Setup chunk
Setup reticulate
knitr::opts_chunk$set(fig.width = 8)
knitr::opts_knit$set(root.dir = normalizePath(".."))
knitr::opts_knit$get("root.dir")
[1] "/nas/groups/treutlein/USERS/tomasgomes/projects/liver_regen"
Load libraries
library(Seurat)
library(ggplot2)
library(ggridges)
library(dplyr)
library(destiny)
library(fdrtool)
library(gam)
library(gamclass)
library(dtw)
library(data.table)
library(clusterProfiler)
library(ReactomePA)
Function for cross-validation
CVgam = function (formula, data, nfold = 10, debug.level = 0, method = "glm.fit",
printit = TRUE, cvparts = NULL, gamma = 1, seed = 29){
# modified from the `gamclass` package to use gam::gam
if (is.null(cvparts)) {
set.seed(seed)
cvparts <- sample(1:nfold, nrow(data), replace = TRUE)
}
folds <- unique(cvparts)
khat <- hat <- numeric(nrow(data))
scale.gam <- summary(gam::gam(formula, data = data, method = method))$scale
for (i in folds) {
trainrows <- cvparts != i
testrows <- cvparts == i
elev.gam <- gam::gam(formula, data = data[trainrows, ], method = method,
gamma = gamma)
hat[testrows] <- predict(elev.gam, newdata = data[testrows,
], select = TRUE)
res <- residuals(elev.gam)
}
y <- eval(formula[[2]], envir = as.data.frame(data))
res <- y - hat
cvscale <- sum(res^2)/length(res)
prntvec <- c(GAMscale = scale.gam, `CV-mse-GAM ` = cvscale)
if (printit)
print(round(prntvec, 4))
invisible(list(fitted = hat, resid = res, cvscale = cvscale,
scale.gam = scale.gam))
}
Load data (from all cells)
CVgam = function (formula, data, nfold = 10, debug.level = 0, method = "glm.fit",
printit = TRUE, cvparts = NULL, gamma = 1, seed = 29){
# modified from the `gamclass` package to use gam::gam
if (is.null(cvparts)) {
set.seed(seed)
cvparts <- sample(1:nfold, nrow(data), replace = TRUE)
}
folds <- unique(cvparts)
khat <- hat <- numeric(nrow(data))
scale.gam <- summary(gam::gam(formula, data = data, method = method))$scale
for (i in folds) {
trainrows <- cvparts != i
testrows <- cvparts == i
elev.gam <- gam::gam(formula, data = data[trainrows, ], method = method,
gamma = gamma)
hat[testrows] <- predict(elev.gam, newdata = data[testrows,
], select = TRUE)
res <- residuals(elev.gam)
}
y <- eval(formula[[2]], envir = as.data.frame(data))
res <- y - hat
cvscale <- sum(res^2)/length(res)
prntvec <- c(GAMscale = scale.gam, `CV-mse-GAM ` = cvscale)
if (printit)
print(round(prntvec, 4))
invisible(list(fitted = hat, resid = res, cvscale = cvscale,
scale.gam = scale.gam))
}
Subset and process each condition
allcells_css = readRDS(file = "data/processed/allcells_css.RDS")
For healthy cells (which will serve as reference), do additional filtering and reprocessing
DimPlot(hep_cells$healthy, reduction = "umap", group.by = "unique_name")
outcells = (hep_cells$healthy@reductions$umap@cell.embeddings[,1]>(1) &
hep_cells$healthy@reductions$umap@cell.embeddings[,2]>5)
hep_cells$healthy = hep_cells$healthy[,!outcells]
hep_cells$healthy = suppressWarnings(SCTransform(hep_cells$healthy, do.correct.umi = T,
verbose = F,
vars.to.regress = c("unique_name","nCount_RNA"),
variable.features.rv.th = 1, seed.use = 1,
return.only.var.genes = F,
variable.features.n = NULL))
hep_cells$healthy = RunPCA(hep_cells$healthy, verbose = F)
hep_cells$healthy = RunUMAP(hep_cells$healthy, dims = 1:15, verbose = F)
DimPlot(hep_cells$healthy, reduction = "umap", group.by = "unique_name")
hep_cells$healthy = FindNeighbors(hep_cells$healthy, reduction = "pca", dims = 1:15,
force.recalc = T, graph.name = "hep")
hep_cells$healthy = FindClusters(hep_cells$healthy, graph.name = "hep", algorithm = 4,
resolution = seq(0.1,0.8,0.1), verbose = F)
DimPlot(hep_cells$healthy, reduction = "umap", group.by = "hep_res.0.3")
hep_cells$healthy = SetIdent(hep_cells$healthy, value = "hep_res.0.6")
mk_hep_h = FindAllMarkers(hep_cells$healthy, assay = "SCT")
saveRDS(hep_cells$healthy, file = "results/zonation_healthy/hep_healthy_srat.RDS")
Save hepatocytes
saveRDS(hep_cells, file="results/zonation_cond/hep_cells.RDS")
Plot expression of zonation markers
gfresh = c("CYP1A2", "CYP2E1", "GLUL", "CYP3A4", "G0S2", "APOC1", "UGT2B17", "BCHE", # central
"SAA1", "HAMP", "SAA2", "CPS1", "C3", "C1R", "MT-ND4", "MT-CO2") # portal
png("results/zonation_healthy/healthy_hepatocyte_zonation_markers_violin.png",
height = 1400, width = 1400)
VlnPlot(hep_cells$healthy, features = gfresh, group.by = "names_clusters",
pt.size = 0, sort = "increasing")
dev.off()
png("results/zonation_healthy/healthy_hepatocyte_zonation_markers_umap.png",
height = 1400, width = 1400)
FeaturePlot(hep_cells$healthy, features = gfresh, pt.size = 0.6)
dev.off()
Obtain Diffusion maps projection
set.seed(1)
dm = DiffusionMap(hep_cells$healthy@reductions$pca@cell.embeddings[,1:4],
rotate = T, n_eigs = 5)
dpt = DPT(dm)
plot(dpt)
Check expression of some genes
dpt_flat <- branch_divide(dpt, integer(0L))
root = min(dpt_flat@branch[, 1], na.rm = TRUE)
dpt_vals = destiny:::dpt_for_branch(dpt_flat, root)
df_dc = cbind(hep_cells$healthy@meta.data,
data.frame("DC1" = dpt@dm$DC1, "DC2" = dpt@dm$DC2, "DPT" = dpt_vals),
Matrix::t(hep_cells$healthy@assays$SCT@data[gfresh,]))
colnames(df_dc) = gsub("-", ".", colnames(df_dc), fixed = T)
plt_list = list()
for(n in c(gsub("-", ".", gfresh, fixed = T), "names_clusters", "unique_name")){
plt_list[[n]] = ggplot(df_dc, aes_string(x = "DC1", y = "DC2", colour = n))+
geom_point()+
theme_classic()
}
png("results/zonation_healthy/healthy_hepatocyte_zonation_markers_dc.png",
height = 900, width = 2800)
cowplot::plot_grid(plotlist = plt_list, nrow = 3, ncol = 6, align = "hv")
dev.off()
We’ll be using DC1 as the pseudotime. This was the best approximation possible to the zonation gradient based on diffusion maps
dpt_flat <- branch_divide(dpt, integer(0L))
root = min(dpt_flat@branch[, 1], na.rm = TRUE)
dpt_vals = destiny:::dpt_for_branch(dpt_flat, root)
df_dc = cbind(hep_cells$healthy@meta.data,
data.frame("DC1" = dpt@dm$DC1, "DC2" = dpt@dm$DC2, "DPT" = dpt_vals),
Matrix::t(hep_cells$healthy@assays$SCT@data[gfresh,]))
colnames(df_dc) = gsub("-", ".", colnames(df_dc), fixed = T)
plt_list = list()
for(n in c(gsub("-", ".", gfresh, fixed = T), "names_clusters", "unique_name")){
plt_list[[n]] = ggplot(df_dc, aes_string(x = "DC1", y = "DC2", colour = n))+
geom_point()+
theme_classic()
}
png("results/zonation_healthy/healthy_hepatocyte_zonation_markers_dc.png",
height = 900, width = 2800)
cowplot::plot_grid(plotlist = plt_list, nrow = 3, ncol = 6, align = "hv")
dev.off()
null device
1
Find genes varying along the pseudotime
pdf("results/zonation_healthy/hep_distributions_dc1.pdf", useDingbats = F,
height = 4, width = 9)
plt1 = ggplot(df_dc, aes(x = DC1, y = names_clusters,
group = names_clusters, fill = names_clusters))+
geom_density_ridges2(alpha = 0.4)+
theme_bw()+
theme(legend.position = "none")
plt2 = ggplot(df_dc, aes(x = DC1, y = unique_name,
group = unique_name, fill = unique_name))+
geom_density_ridges2(alpha = 0.4)+
theme_bw()+
theme(legend.position = "none")
cowplot::plot_grid(plt1, plt2, ncol = 2, align = "h")
Picking joint bandwidth of 0.00354
Picking joint bandwidth of 0.00457
dev.off()
null device
1
Now use the top genes to learn the pseudozonation coordinates and project the embolised and regenerating data
# will only use variable genes
hvg = hep_cells$healthy@assays$SCT@var.features
# Fit GAM for each gene using pseudotime as independent variable.
t = df_dc$DC1
t = scales::rescale(rank(df_dc$DC1), c(0.00001, 0.99999))
gene_fit_p = c()
gene_fit_vals = data.frame(row.names = 1:100)
for(i in 1:length(hvg)){
g = hvg[i]
z = hep_cells$healthy@assays$SCT@data[g,]
d = data.frame(z=z, t=t)
tmp = suppressMessages(gam(z ~ ns(t, df = 3), data=d))
# bins for model fitting
bb = seq(min(t), max(t), length.out = 100)
gene_fit_vals[,g] = suppressMessages(predict(tmp,
newdata = data.frame(t = bb)))
p = summary(tmp)$parametric.anova$`Pr(>F)`[1]
gene_fit_p = c(gene_fit_p, p)
names(gene_fit_p)[length(gene_fit_p)] = g
}
gene_fit_p = fdrtool::fdrtool(gene_fit_p, statistic="pvalue", plot=F,
verbose=F, cutoff.method="pct0", pct0=0.9)$qval
Calculate correlations
sig_genes = gene_fit_p[gene_fit_p<=0.05]
top_sig_genes = names(sig_genes[order(sig_genes, decreasing = F)][1:1000])
top_sig_genes = top_sig_genes[top_sig_genes %in% rownames(hep_cells$embolised@assays$SCT@data) &
top_sig_genes %in% rownames(hep_cells$regenerating@assays$SCT@data)]
data_gam_h = cbind(data.frame("DC1" = df_dc$DC1),
Matrix::t(hep_cells$health@assays$SCT@data[top_sig_genes,]))
#data_gam_h$DC1 = scales::rescale(-data_gam_h$DC1, c(0.00001, 0.99999))
data_gam_h$DC1 = scales::rescale(rank(data_gam_h$DC1), c(0.00001, 0.99999))
#bins_dc1 = cut(data_gam_h$DC1, 200)
#cells_use = unlist(tapply(1:length(bins_dc1), bins_dc1, function(x) sample(x, 12)))
gam_healthy = gam::gam(DC1 ~ ., data = data_gam_h, family = mgcv::betar(link = "logit", eps = 0.00001))
#gam_healthy = gam::gam(DC1 ~ ., data = data_gam_h)
hea_pred = predict(gam_healthy,
Matrix::t(hep_cells$healthy@assays$SCT@data[top_sig_genes,]), type = "response")
reg_pred = predict(gam_healthy,
Matrix::t(hep_cells$regenerating@assays$SCT@data[top_sig_genes,]), type = "response")
emb_pred = predict(gam_healthy,
Matrix::t(hep_cells$embolised@assays$SCT@data[top_sig_genes,]), type = "response")
plot(density(reg_pred), col = "salmon", ylim = c(0,3.25))
lines(density(emb_pred), col = "darkred")
lines(density(data_gam_h$DC1), col = "orange")
lines(density(hea_pred), col = "grey50")
legend(-0.073,32, legend = c("healthy (DC1)", "healthy (pred)",
"embolised (pred)", "regenerating (pred)"),
fill = c("green", "grey50", "red", "blue"))
plot_df = data.frame(vals = c(data_gam_h$DC1, emb_pred, reg_pred),
Condition = c(rep("healthy", length(data_gam_h$DC1)),
rep("embolised", length(emb_pred)),
rep("regenerating", length(reg_pred))),
Donor = c(hep_cells$healthy$Donor,
hep_cells$embolised$Donor, hep_cells$regenerating$Donor))
plot_df$bins100 = cut(plot_df$vals, 10)
plot_df$Condition = factor(plot_df$Condition, levels = c("healthy", "embolised", "regenerating"))
ggplot(plot_df, aes(x = bins100, fill = Condition))+
geom_bar(position = "fill")+
labs(x = "zonation (binned)", y = "proportion")+
scale_y_continuous(expand = c(0,0))+
scale_fill_manual(values = colcond)+
guides(fill = guide_legend(title.position = "top"))+
theme(axis.text.x = element_blank(),
legend.title.align = 0,
legend.position = "bottom")
ggplot(plot_df, aes(x = bins100, fill = Condition))+
facet_wrap(~Condition)+
geom_bar()+
labs(x = "zonation (binned)", y = "Number of cells")+
scale_y_continuous(expand = c(0,0))+
scale_fill_manual(values = colcond)+
coord_flip()+
guides(fill = guide_legend(title.position = "top"))+
theme(axis.text.y = element_blank(),
legend.title.align = 0,
legend.position = "bottom")
ggplot(plot_df, aes(x = vals, colour = Donor))+
facet_wrap(~Condition)+
geom_density()+
labs(x = "zonation (binned)", y = "proportion")+
theme_bw()+
theme(legend.title.align = 0,
legend.position = "bottom")
# data_gam_h$bins20 = cut(data_gam_h$DC1, 20)
# mean_bins = apply(data_gam_h[,2:501], 2, function(x) tapply(x, data_gam_h$bins20, mean))
# cor_reg_bins = cor(as.matrix(hep_cells$regenerating@assays$SCT@data[colnames(mean_bins),]),
# t(mean_bins), method = "sp")
# max_cor_regen = apply(cor_reg_bins, 1, which.max)
# testdf = data.frame(bins = max_cor_regen, dons = hep_cells$regenerating$Donor)
# ggplot(testdf, aes(x = bins, fill = dons))+geom_bar()+facet_wrap(~dons, scales = "free_y")
reg_df = data.frame("pt" = reg_pred, "don" = hep_cells$regenerating$Donor)
reg_df = cbind(reg_df, Matrix::t(hep_cells$regenerating@assays$SCT@data[gfresh,]))
ggplot(reg_df, aes(x = pt, y = SAA1, colour = don))+
geom_point()+
theme_classic()+
facet_wrap(~don)+
geom_hline(yintercept = 3)+
geom_vline(xintercept = c(0,1))
ggplot(reg_df, aes(x = SAA2, y = GLUL, colour = don))+
geom_point()+
theme_classic()+
facet_wrap(~don)
traj_list = list("healthy" = data_gam_h$DC1, "embolised" = emb_pred, "regenerating" = reg_pred)
Find varying genes - using all cells along the ranked pseudotime
cor_h = t(cor(traj_list$healthy, as.matrix(Matrix::t(hep_cells$healthy@assays$SCT@data)),
method = "sp"))
cor_emb = t(cor(traj_list$embolised, as.matrix(Matrix::t(hep_cells$embolised@assays$SCT@data)),
method = "sp"))
the standard deviation is zero
cor_reg = t(cor(traj_list$regenerating, as.matrix(Matrix::t(hep_cells$regenerating@assays$SCT@data)),
method = "sp"))
the standard deviation is zero
l = list(cor_h, cor_emb, cor_reg)
all_corr = Reduce(function(x, y) merge(x,y, by = "rn", all = T),
lapply(l, function(x) data.frame(x, rn = row.names(x))))
colnames(all_corr) = c("genes", "healthy", "embolised", "regenerating")
write.csv(all_corr, file = paste0("results/zonation_cond/hep_correlations_zonation_rank.csv"),
col.names = T, row.names = F, quote = F)
attempt to set 'col.names' ignored
Save trajectories and genes
qvals_list = list()
fits_list = list()
# get HVG from all conditions
hvg = unique(c(hep_cells[["healthy"]]@assays$SCT@var.features,
hep_cells[["embolised"]]@assays$SCT@var.features,
hep_cells[["regenerating"]]@assays$SCT@var.features))
hvg = hvg[hvg %in% rownames(hep_cells[["healthy"]]@assays$SCT@data) &
hvg %in% rownames(hep_cells[["embolised"]]@assays$SCT@data) &
hvg %in% rownames(hep_cells[["regenerating"]]@assays$SCT@data)]
for(cond in names(hep_cells)){
print(cond)
# Fit GAM for each gene using pseudotime as independent variable.
t = traj_list[[cond]]
gene_fit_p = c()
gene_fit_vals = data.frame(row.names = 1:100)
for(i in 1:length(hvg)){
g = hvg[i]
z = hep_cells[[cond]]@assays$SCT@data[g,]
d = data.frame(z=z, t=t)
tmp = suppressMessages(gam(z ~ ns(t, df = 3), data=d))
# bins for model fitting
bb = seq(min(t), max(t), length.out = 100)
gene_fit_vals[,g] = suppressMessages(predict(tmp,
newdata = data.frame(t = bb)))
p = summary(tmp)$parametric.anova$`Pr(>F)`[1]
gene_fit_p = c(gene_fit_p, p)
names(gene_fit_p)[length(gene_fit_p)] = g
}
if(sum(is.na(gene_fit_p))>0){
gene_fit_p[is.na(gene_fit_p)] = 1
}
gene_fit_p = fdrtool::fdrtool(gene_fit_p, statistic="pvalue", plot=F,
verbose=F, cutoff.method="pct0", pct0=0.9)$qval
qvals_list[[cond]] = gene_fit_p
fits_list[[cond]] = gene_fit_vals
}
[1] "healthy"
[1] "embolised"
[1] "regenerating"
Find varying genes - normalised by bins along the ranked pseudotime
save(fits_list, qvals_list, traj_list,
file = "results/zonation_cond/hep_traj_qval_fits_rank.RData")
Save trajectories and genes
save(fits_b_list, qvals_b_list, traj_list,
file = "results/zonation_cond/hep_binned_traj_qval_fits_rank.RData")
Find varying genes - using each donor as a replicate and calculating the average expression for each of 100 bins
save(fits_b_list, qvals_b_list, traj_list,
file = "results/zonation_cond/hep_binned_traj_qval_fits_rank.RData")
Save trajectories and genes
save(fits_db_list, qvals_db_list, traj_list,
file = "results/zonation_cond/hep_binnedDonor_traj_qval_fits_rank.RData")
Plot cell distributions in pseudotime
plot_df = data.frame("pseudotime" = c(traj_list$healthy, traj_list$embolised,
traj_list$regenerating),
"donor" = c(hep_cells$healthy@meta.data[,"Donor"],
hep_cells$embolised@meta.data[names(traj_list$embolised),"Donor"],
hep_cells$regenerating@meta.data[names(traj_list$regenerating),"Donor"]),
"cond" = c(rep("healthy", length(traj_list$healthy)),
rep("embolised", length(traj_list$embolised)),
rep("regenerating", length(traj_list$regenerating))))
pdf("results/zonation_cond/hep_cell_distributions_zonation.pdf", useDingbats = F,
width = 6, height = 5)
ggplot(plot_df, aes(x = donor, y = pseudotime, fill = cond))+
geom_boxplot(notch = T)+
theme_bw()
ggplot(plot_df, aes(x = pseudotime, group = donor, colour = donor))+
facet_wrap(~cond)+
geom_density()+
theme_bw()
ggplot(plot_df, aes(x = pseudotime, colour = cond))+
geom_density()+
theme_bw()
dev.off()
Do cross validation on the healthy trajectory
plot_df = data.frame("pseudotime" = c(traj_list$healthy, traj_list$embolised,
traj_list$regenerating),
"donor" = c(hep_cells$healthy@meta.data[,"Donor"],
hep_cells$embolised@meta.data[names(traj_list$embolised),"Donor"],
hep_cells$regenerating@meta.data[names(traj_list$regenerating),"Donor"]),
"cond" = c(rep("healthy", length(traj_list$healthy)),
rep("embolised", length(traj_list$embolised)),
rep("regenerating", length(traj_list$regenerating))))
pdf("results/zonation_cond/hep_cell_distributions_zonation.pdf", useDingbats = F,
width = 6, height = 5)
ggplot(plot_df, aes(x = donor, y = pseudotime, fill = cond))+
geom_boxplot(notch = T)+
theme_bw()
ggplot(plot_df, aes(x = pseudotime, group = donor, colour = donor))+
facet_wrap(~cond)+
geom_density()+
theme_bw()
ggplot(plot_df, aes(x = pseudotime, colour = cond))+
geom_density()+
theme_bw()
dev.off()
null device
1
Save zonation results
sig_genes = qvals_list$healthy[qvals_list$healthy<=0.05]
top_sig_genes = names(sig_genes[order(sig_genes, decreasing = F)][1:500])
data_gam_h = cbind(data.frame("DC1" = traj_list$healthy),
Matrix::t(hep_cells$health@assays$SCT@data[top_sig_genes,]))
colnames(data_gam_h) = gsub("-", "_", colnames(data_gam_h), fixed = T)
colnames(data_gam_h) = gsub(".", "_", colnames(data_gam_h), fixed = T)
cv_gam = CVgam(formula = DC1 ~ ., data = data_gam_h, nfold = 10, seed = 1, method = "glm.fit")
CV-mse-GAM
0.0106
pdf("results/zonation_healthy/hep_original_fitted_pseudotime.pdf", useDingbats = F,
width = 6, height = 5)
plot(data_gam_h$DC1, cv_gam$fitted, xlab = "Healthy pseudotime (DC1)",ylab = "Predicted Healthy",
main = paste0("Healthy original vs fitted pseudotime\nMSE: ", round(cv_gam$cvscale, 6)),
pch = 19, cex = 0.5)
abline(0,1, col = "blue", lwd = 3)
dev.off()
null device
1
Subset and process each condition (only LSEC)
df_h = data.table("zonation_pt" = traj_list$healthy)
df_h$zonation_int = cut(df_h$zonation_pt, 10)
df_e = data.table("zonation_pt" = traj_list$embolised)
df_e = df_h[df_e, on="zonation_pt", roll=Inf, rollends = T]
df_r = data.table("zonation_pt" = traj_list$regenerating)
df_r = df_h[df_r, on="zonation_pt", roll=Inf, rollends = T]
Error in colnamesInt(i, unname(on), check_dups = FALSE) :
argument specifying columns specify non existing column(s): cols[1]='zonation_pt'
For healthy cells (which will serve as reference), do additional filtering and reprocessing
DimPlot(end_cells$healthy, reduction = "umap", group.by = "allcells_clusters")
gfresh = c("CLEC1B", "CLEC4G", "LYVE1", "CD14", # central
"PECAM1", "AQP1", "VWF", "CD34", # portal
"ALB", "MKI67", "TRAC", "DCN")
FeaturePlot(end_cells$healthy, features = gfresh, reduction = "umap")
outcells = (end_cells$healthy@reductions$umap@cell.embeddings[,1]<(-4)) # this removes what seem to be central hepatocytes
end_cells$healthy = end_cells$healthy[,!outcells]
end_cells$healthy = suppressWarnings(SCTransform(end_cells$healthy, do.correct.umi = T,
verbose = F,
vars.to.regress = c("unique_name","nCount_RNA"),
variable.features.rv.th = 1, seed.use = 1,
return.only.var.genes = F,
variable.features.n = NULL))
end_cells$healthy = RunPCA(end_cells$healthy, verbose = F)
end_cells$healthy = RunUMAP(end_cells$healthy, dims = 1:15, verbose = F)
DimPlot(end_cells$healthy, reduction = "umap", group.by = "unique_name")
end_cells$healthy = FindNeighbors(end_cells$healthy, reduction = "pca", dims = 1:15,
force.recalc = T, graph.name = "end")
end_cells$healthy = FindClusters(end_cells$healthy, graph.name = "end", algorithm = 2,
resolution = seq(0.1,0.8,0.1), verbose = F)
DimPlot(end_cells$healthy, reduction = "umap", group.by = "end_res.0.2")
end_cells$healthy = SetIdent(end_cells$healthy, value = "end_res.0.2")
mk_end_h = FindAllMarkers(end_cells$healthy, assay = "SCT")
saveRDS(end_cells$healthy, file = "results/zonation_healthy/end_healthy_srat.RDS")
Save endothelial cells
saveRDS(end_cells, file="results/zonation_cond/end_cells.RDS")
Plot expression of zonation markers
gfresh = c("CLEC1B", "CLEC4G", "LYVE1", "CD14", # central
"PECAM1", "AQP1", "VWF", "CD34") # portal
png("results/zonation_healthy/healthy_hepatocyte_zonation_markers_violin.png",
height = 1400, width = 1400)
VlnPlot(end_cells$healthy, features = gfresh, group.by = "names_clusters",
pt.size = 0, sort = "increasing")
dev.off()
png("results/zonation_healthy/healthy_hepatocyte_zonation_markers_umap.png",
height = 1400, width = 1400)
FeaturePlot(end_cells$healthy, features = gfresh, pt.size = 0.6)
dev.off()
Obtain Diffusion maps projection
set.seed(1)
dm = DiffusionMap(end_cells$healthy@reductions$pca@cell.embeddings[,1:4],
rotate = T, n_eigs = 5)
dpt = DPT(dm, tips = c(2382, 1379, 424))
plot(dpt, col_by = "DPT424")
plot(dpt, col_by = "branch")
dm = DiffusionMap(end_cells$healthy@reductions$pca@cell.embeddings[dpt$DPT424<15,1:4],
rotate = T, n_eigs = 5)
dpt = DPT(dm)
plot(dpt, col_by = "DPT363")
end_cells$healthy = end_cells$healthy[,names(dpt@dm$DC1)]
Check expression of some genes
set.seed(1)
dm = DiffusionMap(end_cells$healthy@reductions$pca@cell.embeddings[,1:4],
rotate = T, n_eigs = 5)
dpt = DPT(dm, tips = c(2382, 1379, 424))
plot(dpt, col_by = "DPT424")
plot(dpt, col_by = "branch")
dm = DiffusionMap(end_cells$healthy@reductions$pca@cell.embeddings[dpt$DPT424<15,1:4],
rotate = T, n_eigs = 5)
dpt = DPT(dm)
plot(dpt, col_by = "DPT363")
end_cells$healthy = end_cells$healthy[,names(dpt@dm$DC1)]
We’ll be using DPT363 as the pseudotime. This was the best approximation possible to the zonation gradient based on diffusion maps
pdf("results/zonation_healthy/end_distributions_DPT363.pdf", useDingbats = F,
height = 4, width = 9)
plt1 = ggplot(df_dc, aes(x = DPT, y = names_clusters,
group = names_clusters, fill = names_clusters))+
geom_density_ridges2(alpha = 0.4)+
theme_bw()+
theme(legend.position = "none")
plt2 = ggplot(df_dc, aes(x = DPT, y = unique_name,
group = unique_name, fill = unique_name))+
geom_density_ridges2(alpha = 0.4)+
theme_bw()+
theme(legend.position = "none")
cowplot::plot_grid(plt1, plt2, ncol = 2, align = "h")
dev.off()
Find genes varying along the pseudotime
# will only use variable genes
hvg = end_cells$healthy@assays$SCT@var.features
# Fit GAM for each gene using pseudotime as independent variable.
t = scales::rescale(rank(-df_dc$DPT), c(0.00001, 0.99999))
gene_fit_p = c()
gene_fit_vals = data.frame(row.names = 1:100)
for(i in 1:length(hvg)){
g = hvg[i]
z = end_cells$healthy@assays$SCT@data[g,]
d = data.frame(z=z, t=t)
tmp = suppressMessages(gam(z ~ ns(t, df = 3), data=d))
# bins for model fitting
bb = seq(min(t), max(t), length.out = 100)
gene_fit_vals[,g] = suppressMessages(predict(tmp,
newdata = data.frame(t = bb)))
p = summary(tmp)$parametric.anova$`Pr(>F)`[1]
gene_fit_p = c(gene_fit_p, p)
names(gene_fit_p)[length(gene_fit_p)] = g
}
gene_fit_p = fdrtool::fdrtool(gene_fit_p, statistic="pvalue", plot=F,
verbose=F, cutoff.method="pct0", pct0=0.9)$qval
First, clean the data from the other two conditions
# will only use variable genes
hvg = end_cells$healthy@assays$SCT@var.features
# Fit GAM for each gene using pseudotime as independent variable.
t = scales::rescale(rank(-df_dc$DPT), c(0.00001, 0.99999))
gene_fit_p = c()
gene_fit_vals = data.frame(row.names = 1:100)
for(i in 1:length(hvg)){
g = hvg[i]
z = end_cells$healthy@assays$SCT@data[g,]
d = data.frame(z=z, t=t)
tmp = suppressMessages(gam(z ~ ns(t, df = 3), data=d))
# bins for model fitting
bb = seq(min(t), max(t), length.out = 100)
gene_fit_vals[,g] = suppressMessages(predict(tmp,
newdata = data.frame(t = bb)))
p = summary(tmp)$parametric.anova$`Pr(>F)`[1]
gene_fit_p = c(gene_fit_p, p)
names(gene_fit_p)[length(gene_fit_p)] = g
}
gene_fit_p = fdrtool::fdrtool(gene_fit_p, statistic="pvalue", plot=F,
verbose=F, cutoff.method="pct0", pct0=0.9)$qval
Now use the top genes to learn the pseudozonation coordinates and project the embolised and regenerating data
end_cells$embolised = FindNeighbors(end_cells$embolised, reduction = "pca", dims = 1:20,
force.recalc = T, graph.name = "endo")
Computing nearest neighbor graph
Computing SNN
end_cells$embolised = FindClusters(end_cells$embolised, graph.name = "endo", algorithm = 2,
resolution = 0.8, verbose = F)
DimPlot(end_cells$embolised, reduction = "umap", group.by = "endo_res.0.8")
end_cells$embolised = SetIdent(end_cells$embolised, value = "endo_res.0.8")
mk_emb = FindAllMarkers(end_cells$embolised, assay = "SCT")
Calculating cluster 0
| | 0 % ~calculating
|+ | 1 % ~02s
|++ | 3 % ~02s
|+++ | 4 % ~03s
|+++ | 6 % ~02s
|++++ | 7 % ~02s
|+++++ | 9 % ~02s
|++++++ | 10% ~02s
|++++++ | 12% ~02s
|+++++++ | 13% ~02s
|++++++++ | 14% ~02s
|++++++++ | 16% ~02s
|+++++++++ | 17% ~02s
|++++++++++ | 19% ~02s
|+++++++++++ | 20% ~02s
|+++++++++++ | 22% ~02s
|++++++++++++ | 23% ~02s
|+++++++++++++ | 25% ~02s
|++++++++++++++ | 26% ~02s
|++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|++++++++++++++++ | 30% ~02s
|++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 33% ~02s
|++++++++++++++++++ | 35% ~02s
|+++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 43% ~02s
|+++++++++++++++++++++++ | 45% ~02s
|++++++++++++++++++++++++ | 46% ~02s
|++++++++++++++++++++++++ | 48% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|+++++++++++++++++++++++++++ | 52% ~01s
|+++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|+++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|++++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 1
| | 0 % ~calculating
|+ | 1 % ~04s
|++ | 3 % ~04s
|++ | 4 % ~04s
|+++ | 5 % ~04s
|++++ | 7 % ~04s
|++++ | 8 % ~04s
|+++++ | 9 % ~04s
|++++++ | 11% ~04s
|++++++ | 12% ~04s
|+++++++ | 13% ~04s
|++++++++ | 14% ~04s
|++++++++ | 16% ~03s
|+++++++++ | 17% ~03s
|++++++++++ | 18% ~03s
|++++++++++ | 20% ~03s
|+++++++++++ | 21% ~03s
|++++++++++++ | 22% ~03s
|++++++++++++ | 24% ~03s
|+++++++++++++ | 25% ~03s
|++++++++++++++ | 26% ~03s
|++++++++++++++ | 28% ~03s
|+++++++++++++++ | 29% ~03s
|++++++++++++++++ | 30% ~03s
|++++++++++++++++ | 32% ~03s
|+++++++++++++++++ | 33% ~03s
|++++++++++++++++++ | 34% ~03s
|++++++++++++++++++ | 36% ~03s
|+++++++++++++++++++ | 37% ~03s
|++++++++++++++++++++ | 38% ~03s
|++++++++++++++++++++ | 39% ~03s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 43% ~02s
|+++++++++++++++++++++++ | 45% ~02s
|++++++++++++++++++++++++ | 46% ~02s
|++++++++++++++++++++++++ | 47% ~02s
|+++++++++++++++++++++++++ | 49% ~02s
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++ | 51% ~02s
|+++++++++++++++++++++++++++ | 53% ~02s
|+++++++++++++++++++++++++++ | 54% ~02s
|++++++++++++++++++++++++++++ | 55% ~02s
|+++++++++++++++++++++++++++++ | 57% ~02s
|+++++++++++++++++++++++++++++ | 58% ~02s
|++++++++++++++++++++++++++++++ | 59% ~02s
|+++++++++++++++++++++++++++++++ | 61% ~02s
|+++++++++++++++++++++++++++++++ | 62% ~02s
|++++++++++++++++++++++++++++++++ | 63% ~02s
|+++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s
Calculating cluster 2
| | 0 % ~calculating
|+ | 1 % ~02s
|++ | 2 % ~02s
|++ | 3 % ~02s
|+++ | 4 % ~02s
|+++ | 5 % ~02s
|++++ | 6 % ~02s
|++++ | 7 % ~02s
|+++++ | 9 % ~02s
|+++++ | 10% ~02s
|++++++ | 11% ~02s
|++++++ | 12% ~02s
|+++++++ | 13% ~02s
|+++++++ | 14% ~02s
|++++++++ | 15% ~02s
|++++++++ | 16% ~02s
|+++++++++ | 17% ~02s
|++++++++++ | 18% ~02s
|++++++++++ | 19% ~02s
|+++++++++++ | 20% ~02s
|+++++++++++ | 21% ~02s
|++++++++++++ | 22% ~02s
|++++++++++++ | 23% ~02s
|+++++++++++++ | 24% ~02s
|+++++++++++++ | 26% ~02s
|++++++++++++++ | 27% ~02s
|++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|+++++++++++++++ | 30% ~02s
|++++++++++++++++ | 31% ~02s
|++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 33% ~02s
|++++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|+++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~01s
|++++++++++++++++++++ | 38% ~01s
|++++++++++++++++++++ | 39% ~01s
|+++++++++++++++++++++ | 40% ~01s
|+++++++++++++++++++++ | 41% ~01s
|++++++++++++++++++++++ | 43% ~01s
|++++++++++++++++++++++ | 44% ~01s
|+++++++++++++++++++++++ | 45% ~01s
|+++++++++++++++++++++++ | 46% ~01s
|++++++++++++++++++++++++ | 47% ~01s
|++++++++++++++++++++++++ | 48% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|+++++++++++++++++++++++++ | 50% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|+++++++++++++++++++++++++++ | 52% ~01s
|+++++++++++++++++++++++++++ | 53% ~01s
|++++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 56% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|+++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|+++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 69% ~01s
|++++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|++++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~00s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~00s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=02s
Calculating cluster 3
| | 0 % ~calculating
|+ | 1 % ~03s
|++ | 2 % ~03s
|++ | 3 % ~03s
|+++ | 4 % ~03s
|+++ | 5 % ~03s
|++++ | 7 % ~03s
|++++ | 8 % ~03s
|+++++ | 9 % ~02s
|+++++ | 10% ~02s
|++++++ | 11% ~02s
|++++++ | 12% ~02s
|+++++++ | 13% ~02s
|++++++++ | 14% ~02s
|++++++++ | 15% ~02s
|+++++++++ | 16% ~02s
|+++++++++ | 17% ~02s
|++++++++++ | 18% ~02s
|++++++++++ | 20% ~02s
|+++++++++++ | 21% ~02s
|+++++++++++ | 22% ~02s
|++++++++++++ | 23% ~02s
|++++++++++++ | 24% ~02s
|+++++++++++++ | 25% ~02s
|++++++++++++++ | 26% ~02s
|++++++++++++++ | 27% ~02s
|+++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|++++++++++++++++ | 30% ~02s
|++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 33% ~02s
|+++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~02s
|++++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|+++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 42% ~01s
|++++++++++++++++++++++ | 43% ~01s
|+++++++++++++++++++++++ | 45% ~01s
|+++++++++++++++++++++++ | 46% ~01s
|++++++++++++++++++++++++ | 47% ~01s
|++++++++++++++++++++++++ | 48% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|+++++++++++++++++++++++++ | 50% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|+++++++++++++++++++++++++++ | 52% ~01s
|+++++++++++++++++++++++++++ | 53% ~01s
|++++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|+++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|+++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|+++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|+++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 4
| | 0 % ~calculating
|+ | 1 % ~04s
|++ | 2 % ~03s
|++ | 3 % ~03s
|+++ | 4 % ~03s
|+++ | 5 % ~03s
|++++ | 7 % ~03s
|++++ | 8 % ~03s
|+++++ | 9 % ~03s
|+++++ | 10% ~03s
|++++++ | 11% ~03s
|++++++ | 12% ~03s
|+++++++ | 13% ~03s
|++++++++ | 14% ~03s
|++++++++ | 15% ~03s
|+++++++++ | 16% ~03s
|+++++++++ | 17% ~03s
|++++++++++ | 18% ~03s
|++++++++++ | 20% ~03s
|+++++++++++ | 21% ~03s
|+++++++++++ | 22% ~03s
|++++++++++++ | 23% ~03s
|++++++++++++ | 24% ~03s
|+++++++++++++ | 25% ~02s
|++++++++++++++ | 26% ~02s
|++++++++++++++ | 27% ~02s
|+++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|++++++++++++++++ | 30% ~02s
|++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 33% ~02s
|+++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~02s
|++++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|+++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 43% ~02s
|+++++++++++++++++++++++ | 45% ~02s
|+++++++++++++++++++++++ | 46% ~02s
|++++++++++++++++++++++++ | 47% ~02s
|++++++++++++++++++++++++ | 48% ~02s
|+++++++++++++++++++++++++ | 49% ~02s
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++ | 51% ~02s
|+++++++++++++++++++++++++++ | 52% ~02s
|+++++++++++++++++++++++++++ | 53% ~02s
|++++++++++++++++++++++++++++ | 54% ~02s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|+++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|+++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|+++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|+++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 5
| | 0 % ~calculating
|+ | 1 % ~03s
|++ | 2 % ~03s
|++ | 3 % ~03s
|+++ | 4 % ~03s
|+++ | 5 % ~03s
|++++ | 6 % ~03s
|++++ | 7 % ~03s
|+++++ | 8 % ~02s
|+++++ | 9 % ~02s
|++++++ | 10% ~02s
|++++++ | 11% ~02s
|+++++++ | 12% ~02s
|+++++++ | 13% ~02s
|++++++++ | 14% ~02s
|++++++++ | 15% ~02s
|+++++++++ | 16% ~02s
|+++++++++ | 18% ~02s
|++++++++++ | 19% ~02s
|++++++++++ | 20% ~02s
|+++++++++++ | 21% ~02s
|+++++++++++ | 22% ~02s
|++++++++++++ | 23% ~02s
|++++++++++++ | 24% ~02s
|+++++++++++++ | 25% ~02s
|+++++++++++++ | 26% ~02s
|++++++++++++++ | 27% ~02s
|++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|+++++++++++++++ | 30% ~02s
|++++++++++++++++ | 31% ~02s
|++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 33% ~02s
|++++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|+++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~02s
|++++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|+++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 43% ~01s
|+++++++++++++++++++++++ | 44% ~01s
|+++++++++++++++++++++++ | 45% ~01s
|++++++++++++++++++++++++ | 46% ~01s
|++++++++++++++++++++++++ | 47% ~01s
|+++++++++++++++++++++++++ | 48% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|++++++++++++++++++++++++++ | 52% ~01s
|+++++++++++++++++++++++++++ | 53% ~01s
|+++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|++++++++++++++++++++++++++++ | 56% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|+++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|+++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|+++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 69% ~01s
|++++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|++++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 6
| | 0 % ~calculating
|+ | 1 % ~03s
|++ | 3 % ~03s
|++ | 4 % ~03s
|+++ | 5 % ~03s
|++++ | 7 % ~03s
|++++ | 8 % ~02s
|+++++ | 9 % ~02s
|++++++ | 11% ~02s
|++++++ | 12% ~02s
|+++++++ | 13% ~02s
|++++++++ | 15% ~02s
|++++++++ | 16% ~02s
|+++++++++ | 17% ~02s
|++++++++++ | 19% ~02s
|++++++++++ | 20% ~02s
|+++++++++++ | 21% ~02s
|++++++++++++ | 23% ~02s
|++++++++++++ | 24% ~02s
|+++++++++++++ | 25% ~02s
|++++++++++++++ | 27% ~02s
|++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|++++++++++++++++ | 31% ~02s
|++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 33% ~02s
|++++++++++++++++++ | 35% ~02s
|++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~02s
|++++++++++++++++++++ | 39% ~02s
|++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 43% ~02s
|++++++++++++++++++++++ | 44% ~02s
|+++++++++++++++++++++++ | 45% ~02s
|++++++++++++++++++++++++ | 47% ~01s
|++++++++++++++++++++++++ | 48% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|++++++++++++++++++++++++++ | 52% ~01s
|+++++++++++++++++++++++++++ | 53% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|++++++++++++++++++++++++++++ | 56% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 69% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 7
| | 0 % ~calculating
|+ | 1 % ~06s
|++ | 2 % ~06s
|++ | 3 % ~06s
|+++ | 4 % ~06s
|+++ | 5 % ~06s
|++++ | 6 % ~05s
|++++ | 7 % ~05s
|+++++ | 8 % ~05s
|+++++ | 9 % ~05s
|++++++ | 10% ~05s
|++++++ | 11% ~05s
|+++++++ | 12% ~05s
|+++++++ | 13% ~05s
|++++++++ | 14% ~05s
|++++++++ | 15% ~05s
|+++++++++ | 16% ~05s
|+++++++++ | 18% ~05s
|++++++++++ | 19% ~05s
|++++++++++ | 20% ~05s
|+++++++++++ | 21% ~05s
|+++++++++++ | 22% ~05s
|++++++++++++ | 23% ~05s
|++++++++++++ | 24% ~05s
|+++++++++++++ | 25% ~04s
|+++++++++++++ | 26% ~04s
|++++++++++++++ | 27% ~04s
|++++++++++++++ | 28% ~04s
|+++++++++++++++ | 29% ~04s
|+++++++++++++++ | 30% ~04s
|++++++++++++++++ | 31% ~04s
|++++++++++++++++ | 32% ~04s
|+++++++++++++++++ | 33% ~04s
|++++++++++++++++++ | 34% ~04s
|++++++++++++++++++ | 35% ~04s
|+++++++++++++++++++ | 36% ~04s
|+++++++++++++++++++ | 37% ~04s
|++++++++++++++++++++ | 38% ~04s
|++++++++++++++++++++ | 39% ~04s
|+++++++++++++++++++++ | 40% ~04s
|+++++++++++++++++++++ | 41% ~03s
|++++++++++++++++++++++ | 42% ~03s
|++++++++++++++++++++++ | 43% ~03s
|+++++++++++++++++++++++ | 44% ~03s
|+++++++++++++++++++++++ | 45% ~03s
|++++++++++++++++++++++++ | 46% ~03s
|++++++++++++++++++++++++ | 47% ~03s
|+++++++++++++++++++++++++ | 48% ~03s
|+++++++++++++++++++++++++ | 49% ~03s
|++++++++++++++++++++++++++ | 51% ~03s
|++++++++++++++++++++++++++ | 52% ~03s
|+++++++++++++++++++++++++++ | 53% ~03s
|+++++++++++++++++++++++++++ | 54% ~03s
|++++++++++++++++++++++++++++ | 55% ~03s
|++++++++++++++++++++++++++++ | 56% ~03s
|+++++++++++++++++++++++++++++ | 57% ~03s
|+++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++ | 59% ~02s
|++++++++++++++++++++++++++++++ | 60% ~02s
|+++++++++++++++++++++++++++++++ | 61% ~02s
|+++++++++++++++++++++++++++++++ | 62% ~02s
|++++++++++++++++++++++++++++++++ | 63% ~02s
|++++++++++++++++++++++++++++++++ | 64% ~02s
|+++++++++++++++++++++++++++++++++ | 65% ~02s
|+++++++++++++++++++++++++++++++++ | 66% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|+++++++++++++++++++++++++++++++++++ | 68% ~02s
|+++++++++++++++++++++++++++++++++++ | 69% ~02s
|++++++++++++++++++++++++++++++++++++ | 70% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|+++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|++++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
Calculating cluster 8
| | 0 % ~calculating
|+ | 1 % ~04s
|++ | 2 % ~04s
|++ | 3 % ~04s
|+++ | 4 % ~04s
|+++ | 5 % ~04s
|++++ | 6 % ~03s
|++++ | 7 % ~03s
|+++++ | 8 % ~03s
|+++++ | 9 % ~03s
|++++++ | 10% ~03s
|++++++ | 11% ~03s
|+++++++ | 12% ~03s
|+++++++ | 13% ~03s
|++++++++ | 14% ~03s
|++++++++ | 15% ~03s
|+++++++++ | 16% ~03s
|+++++++++ | 17% ~03s
|++++++++++ | 18% ~03s
|++++++++++ | 19% ~03s
|+++++++++++ | 20% ~03s
|+++++++++++ | 21% ~03s
|++++++++++++ | 22% ~03s
|++++++++++++ | 23% ~03s
|+++++++++++++ | 24% ~03s
|+++++++++++++ | 26% ~03s
|++++++++++++++ | 27% ~03s
|++++++++++++++ | 28% ~03s
|+++++++++++++++ | 29% ~03s
|+++++++++++++++ | 30% ~03s
|++++++++++++++++ | 31% ~03s
|++++++++++++++++ | 32% ~03s
|+++++++++++++++++ | 33% ~03s
|+++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~02s
|+++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 41% ~02s
|+++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 43% ~02s
|++++++++++++++++++++++ | 44% ~02s
|+++++++++++++++++++++++ | 45% ~02s
|+++++++++++++++++++++++ | 46% ~02s
|++++++++++++++++++++++++ | 47% ~02s
|++++++++++++++++++++++++ | 48% ~02s
|+++++++++++++++++++++++++ | 49% ~02s
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++ | 51% ~02s
|+++++++++++++++++++++++++++ | 52% ~02s
|+++++++++++++++++++++++++++ | 53% ~02s
|++++++++++++++++++++++++++++ | 54% ~02s
|++++++++++++++++++++++++++++ | 55% ~02s
|+++++++++++++++++++++++++++++ | 56% ~02s
|+++++++++++++++++++++++++++++ | 57% ~02s
|++++++++++++++++++++++++++++++ | 58% ~02s
|++++++++++++++++++++++++++++++ | 59% ~02s
|+++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|++++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|+++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 69% ~01s
|++++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|++++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s
Calculating cluster 9
| | 0 % ~calculating
|+ | 1 % ~03s
|++ | 2 % ~03s
|++ | 3 % ~03s
|+++ | 4 % ~03s
|+++ | 6 % ~03s
|++++ | 7 % ~03s
|++++ | 8 % ~03s
|+++++ | 9 % ~03s
|++++++ | 10% ~03s
|++++++ | 11% ~03s
|+++++++ | 12% ~03s
|+++++++ | 13% ~03s
|++++++++ | 15% ~03s
|++++++++ | 16% ~03s
|+++++++++ | 17% ~03s
|+++++++++ | 18% ~03s
|++++++++++ | 19% ~03s
|+++++++++++ | 20% ~03s
|+++++++++++ | 21% ~02s
|++++++++++++ | 22% ~02s
|++++++++++++ | 24% ~02s
|+++++++++++++ | 25% ~02s
|+++++++++++++ | 26% ~02s
|++++++++++++++ | 27% ~02s
|+++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|++++++++++++++++ | 30% ~02s
|++++++++++++++++ | 31% ~02s
|+++++++++++++++++ | 33% ~02s
|+++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 37% ~02s
|++++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|+++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 43% ~02s
|++++++++++++++++++++++ | 44% ~02s
|+++++++++++++++++++++++ | 45% ~02s
|++++++++++++++++++++++++ | 46% ~02s
|++++++++++++++++++++++++ | 47% ~02s
|+++++++++++++++++++++++++ | 48% ~02s
|+++++++++++++++++++++++++ | 49% ~02s
|++++++++++++++++++++++++++ | 51% ~02s
|++++++++++++++++++++++++++ | 52% ~02s
|+++++++++++++++++++++++++++ | 53% ~02s
|+++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 56% ~01s
|+++++++++++++++++++++++++++++ | 57% ~01s
|++++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|+++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|+++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 67% ~01s
|+++++++++++++++++++++++++++++++++++ | 69% ~01s
|+++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 73% ~01s
|++++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 10
| | 0 % ~calculating
|+ | 1 % ~03s
|++ | 2 % ~03s
|++ | 4 % ~03s
|+++ | 5 % ~03s
|++++ | 6 % ~03s
|++++ | 8 % ~03s
|+++++ | 9 % ~03s
|+++++ | 10% ~02s
|++++++ | 11% ~02s
|+++++++ | 12% ~02s
|+++++++ | 14% ~02s
|++++++++ | 15% ~02s
|+++++++++ | 16% ~02s
|+++++++++ | 18% ~02s
|++++++++++ | 19% ~02s
|++++++++++ | 20% ~02s
|+++++++++++ | 21% ~02s
|++++++++++++ | 22% ~02s
|++++++++++++ | 24% ~02s
|+++++++++++++ | 25% ~02s
|++++++++++++++ | 26% ~02s
|++++++++++++++ | 28% ~02s
|+++++++++++++++ | 29% ~02s
|+++++++++++++++ | 30% ~02s
|++++++++++++++++ | 31% ~02s
|+++++++++++++++++ | 32% ~02s
|+++++++++++++++++ | 34% ~02s
|++++++++++++++++++ | 35% ~02s
|+++++++++++++++++++ | 36% ~02s
|+++++++++++++++++++ | 38% ~02s
|++++++++++++++++++++ | 39% ~02s
|++++++++++++++++++++ | 40% ~02s
|+++++++++++++++++++++ | 41% ~02s
|++++++++++++++++++++++ | 42% ~02s
|++++++++++++++++++++++ | 44% ~02s
|+++++++++++++++++++++++ | 45% ~01s
|++++++++++++++++++++++++ | 46% ~01s
|++++++++++++++++++++++++ | 48% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|+++++++++++++++++++++++++ | 50% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|+++++++++++++++++++++++++++ | 52% ~01s
|+++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 56% ~01s
|+++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 61% ~01s
|++++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 64% ~01s
|+++++++++++++++++++++++++++++++++ | 65% ~01s
|++++++++++++++++++++++++++++++++++ | 66% ~01s
|++++++++++++++++++++++++++++++++++ | 68% ~01s
|+++++++++++++++++++++++++++++++++++ | 69% ~01s
|+++++++++++++++++++++++++++++++++++ | 70% ~01s
|++++++++++++++++++++++++++++++++++++ | 71% ~01s
|+++++++++++++++++++++++++++++++++++++ | 72% ~01s
|+++++++++++++++++++++++++++++++++++++ | 74% ~01s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 11
| | 0 % ~calculating
|+ | 1 % ~07s
|++ | 2 % ~07s
|++ | 3 % ~06s
|+++ | 4 % ~06s
|+++ | 5 % ~06s
|++++ | 6 % ~06s
|++++ | 7 % ~06s
|+++++ | 8 % ~06s
|+++++ | 9 % ~05s
|++++++ | 10% ~05s
|++++++ | 11% ~05s
|+++++++ | 12% ~05s
|+++++++ | 14% ~05s
|++++++++ | 15% ~05s
|++++++++ | 16% ~05s
|+++++++++ | 17% ~05s
|+++++++++ | 18% ~05s
|++++++++++ | 19% ~05s
|++++++++++ | 20% ~05s
|+++++++++++ | 21% ~05s
|+++++++++++ | 22% ~05s
|++++++++++++ | 23% ~05s
|++++++++++++ | 24% ~05s
|+++++++++++++ | 25% ~05s
|++++++++++++++ | 26% ~05s
|++++++++++++++ | 27% ~05s
|+++++++++++++++ | 28% ~05s
|+++++++++++++++ | 29% ~05s
|++++++++++++++++ | 30% ~04s
|++++++++++++++++ | 31% ~04s
|+++++++++++++++++ | 32% ~04s
|+++++++++++++++++ | 33% ~04s
|++++++++++++++++++ | 34% ~04s
|++++++++++++++++++ | 35% ~04s
|+++++++++++++++++++ | 36% ~04s
|+++++++++++++++++++ | 38% ~04s
|++++++++++++++++++++ | 39% ~04s
|++++++++++++++++++++ | 40% ~04s
|+++++++++++++++++++++ | 41% ~04s
|+++++++++++++++++++++ | 42% ~04s
|++++++++++++++++++++++ | 43% ~03s
|++++++++++++++++++++++ | 44% ~03s
|+++++++++++++++++++++++ | 45% ~03s
|+++++++++++++++++++++++ | 46% ~03s
|++++++++++++++++++++++++ | 47% ~03s
|++++++++++++++++++++++++ | 48% ~03s
|+++++++++++++++++++++++++ | 49% ~03s
|+++++++++++++++++++++++++ | 50% ~03s
|++++++++++++++++++++++++++ | 51% ~03s
|+++++++++++++++++++++++++++ | 52% ~03s
|+++++++++++++++++++++++++++ | 53% ~03s
|++++++++++++++++++++++++++++ | 54% ~03s
|++++++++++++++++++++++++++++ | 55% ~03s
|+++++++++++++++++++++++++++++ | 56% ~03s
|+++++++++++++++++++++++++++++ | 57% ~03s
|++++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++ | 59% ~02s
|+++++++++++++++++++++++++++++++ | 60% ~02s
|+++++++++++++++++++++++++++++++ | 61% ~02s
|++++++++++++++++++++++++++++++++ | 62% ~02s
|++++++++++++++++++++++++++++++++ | 64% ~02s
|+++++++++++++++++++++++++++++++++ | 65% ~02s
|+++++++++++++++++++++++++++++++++ | 66% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++ | 68% ~02s
|+++++++++++++++++++++++++++++++++++ | 69% ~02s
|+++++++++++++++++++++++++++++++++++ | 70% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|+++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~01s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~01s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
Calculating cluster 12
| | 0 % ~calculating
|+ | 1 % ~07s
|++ | 2 % ~07s
|++ | 3 % ~07s
|+++ | 4 % ~07s
|+++ | 5 % ~07s
|++++ | 6 % ~07s
|++++ | 7 % ~07s
|+++++ | 8 % ~07s
|+++++ | 9 % ~07s
|++++++ | 10% ~07s
|++++++ | 11% ~07s
|+++++++ | 12% ~06s
|+++++++ | 13% ~06s
|++++++++ | 14% ~06s
|++++++++ | 15% ~06s
|+++++++++ | 16% ~06s
|+++++++++ | 17% ~06s
|++++++++++ | 18% ~06s
|++++++++++ | 19% ~06s
|+++++++++++ | 20% ~06s
|+++++++++++ | 21% ~06s
|++++++++++++ | 22% ~06s
|++++++++++++ | 23% ~05s
|+++++++++++++ | 24% ~05s
|+++++++++++++ | 25% ~05s
|++++++++++++++ | 26% ~05s
|++++++++++++++ | 27% ~05s
|+++++++++++++++ | 28% ~05s
|+++++++++++++++ | 29% ~05s
|++++++++++++++++ | 30% ~05s
|++++++++++++++++ | 31% ~05s
|+++++++++++++++++ | 32% ~05s
|+++++++++++++++++ | 33% ~05s
|++++++++++++++++++ | 34% ~05s
|++++++++++++++++++ | 35% ~05s
|+++++++++++++++++++ | 36% ~05s
|+++++++++++++++++++ | 37% ~04s
|++++++++++++++++++++ | 38% ~04s
|++++++++++++++++++++ | 39% ~04s
|+++++++++++++++++++++ | 40% ~04s
|+++++++++++++++++++++ | 41% ~04s
|++++++++++++++++++++++ | 42% ~04s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++ | 44% ~04s
|+++++++++++++++++++++++ | 45% ~04s
|++++++++++++++++++++++++ | 46% ~04s
|++++++++++++++++++++++++ | 47% ~04s
|+++++++++++++++++++++++++ | 48% ~04s
|+++++++++++++++++++++++++ | 49% ~04s
|++++++++++++++++++++++++++ | 51% ~04s
|++++++++++++++++++++++++++ | 52% ~03s
|+++++++++++++++++++++++++++ | 53% ~03s
|+++++++++++++++++++++++++++ | 54% ~03s
|++++++++++++++++++++++++++++ | 55% ~03s
|++++++++++++++++++++++++++++ | 56% ~03s
|+++++++++++++++++++++++++++++ | 57% ~03s
|+++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++ | 59% ~03s
|++++++++++++++++++++++++++++++ | 60% ~03s
|+++++++++++++++++++++++++++++++ | 61% ~03s
|+++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++ | 63% ~03s
|++++++++++++++++++++++++++++++++ | 64% ~03s
|+++++++++++++++++++++++++++++++++ | 65% ~03s
|+++++++++++++++++++++++++++++++++ | 66% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++ | 68% ~02s
|+++++++++++++++++++++++++++++++++++ | 69% ~02s
|+++++++++++++++++++++++++++++++++++ | 70% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|+++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++ | 76% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=07s
outcells = (end_cells$embolised@reductions$umap@cell.embeddings[,2]>2.3 &
end_cells$embolised@reductions$umap@cell.embeddings[,1]<1) # this removes what seem to be central hepatocytes
end_cells$embolised = end_cells$embolised[,!outcells]
end_cells$embolised = end_cells$embolised[,end_cells$embolised@meta.data[,"endo_res.0.8"]!=12] # remove these extra stellate cells
end_cells$embolised = suppressWarnings(SCTransform(end_cells$embolised, do.correct.umi = T,
verbose = F,
vars.to.regress = c("unique_name","nCount_RNA"),
variable.features.rv.th = 1, seed.use = 1,
return.only.var.genes = F,
variable.features.n = NULL))
end_cells$embolised = RunPCA(end_cells$embolised, verbose = F)
end_cells$embolised = RunUMAP(end_cells$embolised, dims = 1:15, verbose = F)
DimPlot(end_cells$embolised, reduction = "umap", group.by = "unique_name")
outcells = (end_cells$regenerating@reductions$umap@cell.embeddings[,2]>2.7 &
end_cells$regenerating@reductions$umap@cell.embeddings[,1]<(-1)) # this removes what seem to be central hepatocytes
end_cells$regenerating = end_cells$regenerating[,!outcells]
end_cells$regenerating = suppressWarnings(SCTransform(end_cells$regenerating, do.correct.umi = T,
verbose = F,
vars.to.regress = c("unique_name","nCount_RNA"),
variable.features.rv.th = 1, seed.use = 1,
return.only.var.genes = F,
variable.features.n = NULL))
end_cells$regenerating = RunPCA(end_cells$regenerating, verbose = F)
end_cells$regenerating = RunUMAP(end_cells$regenerating, dims = 1:15, verbose = F)
DimPlot(end_cells$regenerating, reduction = "umap", group.by = "unique_name")
Calculate correlations
sig_genes = gene_fit_p[gene_fit_p<=0.05]
top_sig_genes = names(sig_genes[order(sig_genes, decreasing = F)][1:1000])
top_sig_genes = top_sig_genes[top_sig_genes %in% rownames(end_cells$embolised@assays$SCT@data) &
top_sig_genes %in% rownames(end_cells$regenerating@assays$SCT@data)]
data_gam_h = cbind(data.frame("DPT" = df_dc$DPT),
Matrix::t(end_cells$healthy@assays$SCT@data[top_sig_genes,]))
data_gam_h$DPT = scales::rescale(rank(-data_gam_h$DPT), c(0.00001, 0.99999))
gam_healthy = gam::gam(DPT ~ ., data = data_gam_h, family = mgcv::betar(link = "logit", eps = 0.00001))
hea_pred = predict(gam_healthy,
Matrix::t(end_cells$health@assays$SCT@data[top_sig_genes,]), type= "response")
reg_pred = predict(gam_healthy,
Matrix::t(end_cells$regenerating@assays$SCT@data[top_sig_genes,]), type= "response")
emb_pred = predict(gam_healthy,
Matrix::t(end_cells$embolised@assays$SCT@data[top_sig_genes,]), type= "response")
plot(density(data_gam_h$DPT), col = "green", ylim = c(0,2.75))
lines(density(reg_pred), col = "blue")
lines(density(emb_pred), col = "red")
lines(density(hea_pred), col = "grey50")
legend(-0.073,32, legend = c("healthy (DC1)", "healthy (pred)",
"embolised (pred)", "regenerating (pred)"),
fill = c("green", "grey50", "red", "blue"))
plot_df = data.frame(vals = c(data_gam_h$DPT, emb_pred, reg_pred),
Condition = c(rep("healthy", length(data_gam_h$DPT)),
rep("embolised", length(emb_pred)),
rep("regenerating", length(reg_pred))),
Donor = c(end_cells$healthy$Donor,
end_cells$embolised$Donor, end_cells$regenerating$Donor),
ct = c(end_cells$healthy$allcells_simp,
end_cells$embolised$allcells_simp, end_cells$regenerating$allcells_simp))
plot_df$bins100 = cut(plot_df$vals, 10)
plot_df$Condition = factor(plot_df$Condition, levels = c("healthy", "embolised", "regenerating"))
ggplot(plot_df, aes(x = bins100, fill = Condition))+
geom_bar(position = "fill")+
labs(x = "zonation (binned)", y = "proportion")+
scale_y_continuous(expand = c(0,0))+
scale_fill_manual(values = colcond)+
guides(fill = guide_legend(title.position = "top"))+
theme(axis.text.x = element_blank(),
legend.title.align = 0,
legend.position = "bottom")
ggplot(plot_df, aes(x = bins100, fill = Condition))+
facet_wrap(~Condition)+
geom_bar()+
labs(x = "zonation (binned)", y = "Number of cells")+
scale_y_continuous(expand = c(0,0))+
scale_fill_manual(values = colcond)+
coord_flip()+
guides(fill = guide_legend(title.position = "top"))+
theme(axis.text.y = element_blank(),
legend.title.align = 0,
legend.position = "bottom")
ggplot(plot_df, aes(x = vals, colour = Donor))+
facet_wrap(~Condition)+
geom_density()+
labs(x = "zonation (binned)", y = "proportion")+
theme_bw()+
theme(legend.title.align = 0,
legend.position = "bottom")
ggplot(plot_df, aes(x = vals, colour = ct))+
facet_wrap(~Condition)+
geom_density()+
labs(x = "zonation (binned)", y = "proportion")+
theme_bw()+
theme(legend.title.align = 0,
legend.position = "bottom")
reg_df = data.frame("pt" = reg_pred, "don" = end_cells$regenerating$Donor)
reg_df = cbind(reg_df, Matrix::t(end_cells$regenerating@assays$SCT@data[gfresh,]))
ggplot(reg_df, aes(x = pt, y = CLEC1B, colour = don))+
geom_point()+
theme_classic()+
facet_wrap(~don)+
geom_hline(yintercept = 3)+
geom_vline(xintercept = c(0,1))
ggplot(reg_df, aes(x = CLEC1B, y = LYVE1, colour = don))+
geom_point()+
theme_classic()+
facet_wrap(~don)
traj_list = list("healthy" = data_gam_h$DPT, "embolised" = emb_pred, "regenerating" = reg_pred)
Find varying genes - using all cells along the ranked pseudotime
cor_h = t(cor(data_gam_h$DPT, as.matrix(Matrix::t(end_cells$healthy@assays$SCT@data)),
method = "sp"))
cor_emb = t(cor(emb_pred, as.matrix(Matrix::t(end_cells$embolised@assays$SCT@data)),
method = "sp"))
cor_reg = t(cor(reg_pred, as.matrix(Matrix::t(end_cells$regenerating@assays$SCT@data)),
method = "sp"))
l = list(cor_h, cor_emb, cor_reg)
all_corr = Reduce(function(x, y) merge(x,y, by = "rn", all = T),
lapply(l, function(x) data.frame(x, rn = row.names(x))))
colnames(all_corr) = c("genes", "healthy", "embolised", "regenerating")
write.csv(all_corr, file = paste0("results/zonation_cond/end_correlations_zonation_rank.csv"),
col.names = T, row.names = F, quote = F)
attempt to set 'col.names' ignored
Save trajectories and genes
save(fits_list, qvals_list, traj_list,
file = "results/zonation_cond/end_traj_qval_fits_rank.RData")
Find varying genes - normalised by bins along the ranked pseudotime
qvals_b_list = list()
fits_b_list = list()
# get HVG from all conditions
hvg = unique(c(end_cells[["healthy"]]@assays$SCT@var.features,
end_cells[["embolised"]]@assays$SCT@var.features,
end_cells[["regenerating"]]@assays$SCT@var.features))
hvg = hvg[hvg %in% rownames(end_cells[["healthy"]]@assays$SCT@data) &
hvg %in% rownames(end_cells[["embolised"]]@assays$SCT@data) &
hvg %in% rownames(end_cells[["regenerating"]]@assays$SCT@data)]
for(cond in names(end_cells)){
print(cond)
set.seed(1)
# filtering the cells by bins
bins = cut(traj_list[[cond]], 100)
dons = end_cells[[cond]]$Donor
ndons = rowSums(table(bins, dons)>1)
ncells = rowSums(table(bins, dons))
sampleif = function(cells, bins, dons){
res = c()
for(d in unique(dons)){
for(b in unique(bins)){
x = cells[bins==b & dons==d]
if(length(x)>=30){
res = c(res, sample(x, 30, replace = F))
} else if(length(x)>=1 & ndons[b]>=2 & ncells[b]>=10){
res = c(res, x)
} else{
res = c(res, NULL)
}
}
}
return(res)
}
whichcells = sampleif(1:length(bins), bins, dons)
# Fit GAM for each gene using pseudotime as independent variable.
t = traj_list[[cond]][whichcells]
gene_fit_p = c()
gene_fit_vals = data.frame(row.names = 1:100)
for(i in 1:length(hvg)){
g = hvg[i]
z = end_cells[[cond]]@assays$SCT@data[g,whichcells]
d = data.frame(z=z, t=t)
tmp = suppressMessages(gam(z ~ ns(t, df = 3), data=d))
# bins for model fitting
bb = seq(min(t), max(t), length.out = 100)
gene_fit_vals[,g] = suppressMessages(predict(tmp,
newdata = data.frame(t = bb)))
p = summary(tmp)$parametric.anova$`Pr(>F)`[1]
gene_fit_p = c(gene_fit_p, p)
names(gene_fit_p)[length(gene_fit_p)] = g
}
if(sum(is.na(gene_fit_p))>0){
gene_fit_p[is.na(gene_fit_p)] = 1
}
gene_fit_p = fdrtool::fdrtool(gene_fit_p, statistic="pvalue", plot=F,
verbose=F, cutoff.method="pct0", pct0=0.9)$qval
qvals_b_list[[cond]] = gene_fit_p
fits_b_list[[cond]] = gene_fit_vals
}
Save trajectories and genes
save(fits_b_list, qvals_b_list, traj_list,
file = "results/zonation_cond/end_binned_traj_qval_fits_rank.RData")
Find varying genes - using each donor as a replicate and calculating the average expression for each of 100 bins
save(fits_b_list, qvals_b_list, traj_list,
file = "results/zonation_cond/end_binned_traj_qval_fits_rank.RData")
Save trajectories and genes
qvals_db_list = list()
fits_db_list = list()
# get HVG from all conditions
hvg = unique(c(end_cells[["healthy"]]@assays$SCT@var.features,
end_cells[["embolised"]]@assays$SCT@var.features,
end_cells[["regenerating"]]@assays$SCT@var.features))
hvg = hvg[hvg %in% rownames(end_cells[["healthy"]]@assays$SCT@data) &
hvg %in% rownames(end_cells[["embolised"]]@assays$SCT@data) &
hvg %in% rownames(end_cells[["regenerating"]]@assays$SCT@data)]
meta_df = data.frame("pt" = c(traj_list$healthy, traj_list$embolised, traj_list$regenerating),
"donors" = c(end_cells$healthy$Donor, end_cells$embolised$Donor,
end_cells$regenerating$Donor),
"cond" = c(end_cells$healthy$Condition, end_cells$embolised$Condition,
end_cells$regenerating$Condition))
meta_df$bins = cut(meta_df$pt, 100)
for(cond in unique(meta_df$cond)){
print(cond)
set.seed(1)
# Fit GAM for each gene
t = 1:100
sub_meta_df = meta_df[meta_df$cond==cond,]
gene_fit_p = c()
gene_fit_vals = data.frame(row.names = 1:100)
for(i in 1:length(hvg)){
g = hvg[i]
z = end_cells[[cond]]@assays$SCT@data[g,]
d = aggregate(z~bins+donors, data = cbind(sub_meta_df, z), FUN="mean")
cc = data.frame(table(sub_meta_df$bins, as.factor(as.character(sub_meta_df$donors))))
cc = cc[cc$Freq==0,]
colnames(cc) = colnames(d)
d = rbind(d, cc)
d = d[order(d$bins, decreasing = F),]
d$t = rep(t, each = length(unique(d$donors)))
tmp = suppressMessages(gam(z ~ ns(t, df = 3), data=d))
# bins for model fitting
bb = seq(min(t), max(t), length.out = 100)
gene_fit_vals[,g] = suppressMessages(predict(tmp,
newdata = data.frame(t = bb)))
p = summary(tmp)$parametric.anova$`Pr(>F)`[1]
gene_fit_p = c(gene_fit_p, p)
names(gene_fit_p)[length(gene_fit_p)] = g
}
if(sum(is.na(gene_fit_p))>0){
gene_fit_p[is.na(gene_fit_p)] = 1
}
gene_fit_p = fdrtool::fdrtool(gene_fit_p, statistic="pvalue", plot=F,
verbose=F, cutoff.method="pct0", pct0=0.9)$qval
qvals_db_list[[cond]] = gene_fit_p
fits_db_list[[cond]] = gene_fit_vals
}
[1] "healthy"
[1] "embolised"
[1] "regenerating"
Plot cell distributions in pseudotime
save(fits_db_list, qvals_db_list, traj_list,
file = "results/zonation_cond/end_binnedDonor_traj_qval_fits_rank.RData")
Do cross validation on the healthy trajectory
plot_df = data.frame("pseudotime" = c(traj_list$healthy, traj_list$embolised,
traj_list$regenerating),
"donor" = c(end_cells$healthy@meta.data[,"Donor"],
end_cells$embolised@meta.data[names(traj_list$embolised),"Donor"],
end_cells$regenerating@meta.data[names(traj_list$regenerating),"Donor"]),
"cond" = c(rep("healthy", length(traj_list$healthy)),
rep("embolised", length(traj_list$embolised)),
rep("regenerating", length(traj_list$regenerating))))
pdf("results/zonation_cond/end_cell_distributions_zonation.pdf", useDingbats = F,
width = 6, height = 5)
ggplot(plot_df, aes(x = donor, y = pseudotime, fill = cond))+
geom_boxplot(notch = T)+
theme_bw()
ggplot(plot_df, aes(x = pseudotime, group = donor, colour = donor))+
facet_wrap(~cond)+
geom_density()+
theme_bw()
ggplot(plot_df, aes(x = pseudotime, colour = cond))+
geom_density()+
theme_bw()
dev.off()
null device
1
Save zonation results
sig_genes = qvals_b_list$healthy[qvals_b_list$healthy<=0.05]
top_sig_genes = names(sig_genes[order(sig_genes, decreasing = F)][1:1000])
# 500 in this case actually seems to be one of the models with the lowest MSE
data_gam_h = cbind(data.frame("DPT" = traj_list$healthy),
Matrix::t(end_cells$healthy@assays$SCT@data[top_sig_genes,]))
colnames(data_gam_h) = gsub("-", "_", colnames(data_gam_h), fixed = T)
colnames(data_gam_h) = gsub(".", "_", colnames(data_gam_h), fixed = T)
cv_gam = CVgam(data = data_gam_h, formula = DPT ~ ., nfold = 10, seed = 1, method = "glm.fit")
CV-mse-GAM
0.0111
pdf("results/zonation_healthy/end_original_fitted_pseudotime.pdf", useDingbats = F,
width = 6, height = 5)
plot(data_gam_h$DPT, cv_gam$fitted, xlab = "Healthy pseudotime (DPT363)",ylab = "Predicted Healthy",
main = paste0("Healthy original vs fitted pseudotime\nMSE: ", round(cv_gam$cvscale, 6)),
pch = 19, cex = 0.5)
abline(0,1, col = "blue", lwd = 3)
dev.off()
null device
1
Load data
end_cells = readRDS(file = "results/zonation_cond/end_cells_zonation_rank.RDS")
hep_cells = readRDS(file = "results/zonation_cond/hep_cells_zonation_rank.RDS")
Load fits
end_cells = readRDS(file = "results/zonation_cond/end_cells_zonation_rank.RDS")
hep_cells = readRDS(file = "results/zonation_cond/hep_cells_zonation_rank.RDS")
Calculate expression per bins
load("results/zonation_cond/hep_binnedDonor_traj_qval_fits_rank.RData")
hep_fits_list = fits_db_list
hep_qvals_list = qvals_db_list
hep_traj = traj_list
load("results/zonation_cond/end_binnedDonor_traj_qval_fits_rank.RData")
end_fits_list = fits_db_list
end_qvals_list = qvals_db_list
end_traj = traj_list
Correlate binned expression
nbins = 100
dfhep_h = data.table("zonation_pt" = hep_traj$healthy)
dfhep_h$zonation_int = cut(dfhep_h$zonation_pt, nbins)
dfhep_e = data.table("zonation_pt" = hep_traj$embolised)
dfhep_e = dfhep_h[dfhep_e, on="zonation_pt", roll=Inf, rollends = T]
dfhep_r = data.table("zonation_pt" = hep_traj$regenerating)
dfhep_r = dfhep_h[dfhep_r, on="zonation_pt", roll=Inf, rollends = T]
hep_mean_bins = list(
healthy = apply(hep_cells$healthy@assays$SCT@data, 1,
function(x) tapply(x, dfhep_h$zonation_int, mean)),
embolised = apply(hep_cells$embolised@assays$SCT@data, 1,
function(x) tapply(x, dfhep_e$zonation_int, mean)),
regenerating = apply(hep_cells$regenerating@assays$SCT@data, 1,
function(x) tapply(x, dfhep_r$zonation_int, mean))
)
Correlate fitted expression
comb_cond = combn(names(end_cells), 2)
hep_cor_fit_list = list()
for(i in 1:ncol(comb_cond)){
c1 = comb_cond[1,i]
c2 = comb_cond[2,i]
genes = intersect(colnames(hep_fits_list[[c1]]),colnames(hep_fits_list[[c2]]))
for(g in genes){
hep_cor_fit_list[[paste0(c1,"_",c2)]][[g]] = cor(cbind(hep_fits_list[[c1]][,g],
hep_fits_list[[c2]][,g]),
method = "sp")[1,2]
}
}
hep_cor_fit_list = lapply(hep_cor_fit_list, data.frame)
hep_cor_fit_list = lapply(hep_cor_fit_list, function(x) cbind(rownames(x), x))
end_cor_fit_list = list()
for(i in 1:ncol(comb_cond)){
c1 = comb_cond[1,i]
c2 = comb_cond[2,i]
genes = intersect(colnames(end_fits_list[[c1]]),colnames(end_fits_list[[c2]]))
for(g in genes){
end_cor_fit_list[[paste0(c1,"_",c2)]][[g]] = cor(cbind(end_fits_list[[c1]][,g],
end_fits_list[[c2]][,g]),
method = "sp")[1,2]
}
}
end_cor_fit_list = lapply(end_cor_fit_list, data.frame)
end_cor_fit_list = lapply(end_cor_fit_list, function(x) cbind(rownames(x), x))
Save correlations
save(hep_cor_list, end_cor_list,
file = "results/zonation_cond/zonation_cor_cond_lists_rank.RData")
Filter pathways with ribosomal genes
save(hep_cor_list, end_cor_list,
file = "results/zonation_cond/zonation_cor_cond_lists_rank.RData")
Get the top genes for each condition
rpfilter = function(x){
return(!grepl("^RPL", x$geneID) & !grepl("^RPS", x$geneID) &
!grepl("/RPL", x$geneID, fixed = T) & !grepl("/RPS", x$geneID, fixed = T))
}
Find enriched GO Terms and Reactome pathways
# get signifficant genes showing variation (at least 0.1 range in the fitted values)
genes_list = list()
for(n in names(hep_fits_list)){
h_fc_sp = apply(hep_fits_list[[n]], 2, function(x) diff(range(x)))
genes_c = names(hep_qvals_list[[n]])[hep_qvals_list[[n]]<=0.05 &
names(hep_qvals_list[[n]]) %in% names(h_fc_sp)[h_fc_sp>=0.1]]
genes_c = genes_c[genes_c %in% names(hep_qvals_list[[n]])[order(hep_qvals_list[[n]], decreasing = F)]]
genes_list[[n]] = genes_c
}
Identify pathways more affected by one condition
anot_list = list()
for(n in names(terms_groups_list)){
terms_groups = terms_groups_list[[n]]
df_cor = df_cor_list[[n]]
# get the pathways for all genes, minus those with ribosomal genes
all_path = terms_groups$all[rpfilter(terms_groups$all),]
all_path = all_path[order(all_path$qvalue, decreasing = F),]
# get gene groups as a data frame
g_g = unlist(apply(df_cor[,c(1,3,4,5)], 1,
function(x) colnames(df_cor[,c(3,4,5)])[which(as.logical(x[2:4]))]))
gene_group = data.frame("gene" = names(g_g),
"group" = g_g)
# get fraction of genes from a condition that are present in a give pathway
n_g_group = list()
n_g_group[[levels(gene_group$group)[1]]] = c()
n_g_group[[levels(gene_group$group)[2]]] = c()
n_g_group[[levels(gene_group$group)[3]]] = c()
for(i in 1:nrow(all_path)){
genes_p = unlist(strsplit(all_path$geneID[i], "/"))
for(gg in unique(gene_group$group)){
ngg = sum(genes_p %in% gene_group$gene[gene_group$group==gg])/sum(gene_group$group==gg)
n_g_group[[gg]] = c(n_g_group[[gg]], ngg)
}
}
all_path = cbind(all_path, data.frame(n_g_group))
all_path$diff = all_path[,13] - all_path[,12] # add difference between cond and healthy
all_path$highest = colnames(all_path)[11:13][apply(all_path[,11:13], 1, which.max)]
# group the detected pathways by similarity
go_path = all_path[all_path$DB=="GO Term",]
mat_path = matrix(0, nrow(go_path), nrow(go_path))
for(i in 1:nrow(go_path)){
g1 = go_path[i,"geneID"]
g1 = unlist(strsplit(g1, "/"))
for(j in 1:nrow(go_path)){
g2 = go_path[j,"geneID"]
g2 = unlist(strsplit(g2, "/"))
mat_path[i,j] = length(intersect(g1, g2))/mean(c(length(g1), length(g2)))
}
}
cl_go = hclust(dist(mat_path), method = "ward.D2")
cl_go = cutree(cl_go, 7)
rownames(mat_path) = go_path$Description
anot = data.frame(row.names = go_path$Description,
"cl" = paste0("cl",cl_go),
"n" = go_path$Count,
"diff" = go_path$diff,
"qval" = -log10(go_path$qvalue),
"term" = go_path$Description)
pheatmap::pheatmap(mat_path, clustering_method = "ward.D2",
annotation_row = anot[,-5], show_rownames = F)
genes_per_gocl = tapply(anot$term, anot$cl,
function(x) unique(unlist(strsplit(go_path[go_path$Description %in% x,"geneID"], "/"))))
genes_cond_gocl = lapply(genes_per_gocl, function(x) lapply(genes_conds, function(y) sum(y %in% x)))
genes_cond_gocl = Reduce(rbind, genes_cond_gocl)
rownames(genes_cond_gocl) = paste0("cl", 1:7)
anot_list[[n]] = anot
}
Plot top terms
anot_list = list()
for(n in names(terms_groups_list)){
terms_groups = terms_groups_list[[n]]
df_cor = df_cor_list[[n]]
# get the pathways for all genes, minus those with ribosomal genes
all_path = terms_groups$all[rpfilter(terms_groups$all),]
all_path = all_path[order(all_path$qvalue, decreasing = F),]
# get gene groups as a data frame
g_g = unlist(apply(df_cor[,c(1,3,4,5)], 1,
function(x) colnames(df_cor[,c(3,4,5)])[which(as.logical(x[2:4]))]))
gene_group = data.frame("gene" = names(g_g),
"group" = g_g)
# get fraction of genes from a condition that are present in a give pathway
n_g_group = list()
n_g_group[[levels(gene_group$group)[1]]] = c()
n_g_group[[levels(gene_group$group)[2]]] = c()
n_g_group[[levels(gene_group$group)[3]]] = c()
for(i in 1:nrow(all_path)){
genes_p = unlist(strsplit(all_path$geneID[i], "/"))
for(gg in unique(gene_group$group)){
ngg = sum(genes_p %in% gene_group$gene[gene_group$group==gg])/sum(gene_group$group==gg)
n_g_group[[gg]] = c(n_g_group[[gg]], ngg)
}
}
all_path = cbind(all_path, data.frame(n_g_group))
all_path$diff = all_path[,13] - all_path[,12] # add difference between cond and healthy
all_path$highest = colnames(all_path)[11:13][apply(all_path[,11:13], 1, which.max)]
# group the detected pathways by similarity
go_path = all_path[all_path$DB=="GO Term",]
mat_path = matrix(0, nrow(go_path), nrow(go_path))
for(i in 1:nrow(go_path)){
g1 = go_path[i,"geneID"]
g1 = unlist(strsplit(g1, "/"))
for(j in 1:nrow(go_path)){
g2 = go_path[j,"geneID"]
g2 = unlist(strsplit(g2, "/"))
mat_path[i,j] = length(intersect(g1, g2))/mean(c(length(g1), length(g2)))
}
}
cl_go = hclust(dist(mat_path), method = "ward.D2")
cl_go = cutree(cl_go, 7)
rownames(mat_path) = go_path$Description
anot = data.frame(row.names = go_path$Description,
"cl" = paste0("cl",cl_go),
"n" = go_path$Count,
"diff" = go_path$diff,
"qval" = -log10(go_path$qvalue),
"term" = go_path$Description)
pheatmap::pheatmap(mat_path, clustering_method = "ward.D2",
annotation_row = anot[,-5], show_rownames = F)
genes_per_gocl = tapply(anot$term, anot$cl,
function(x) unique(unlist(strsplit(go_path[go_path$Description %in% x,"geneID"], "/"))))
genes_cond_gocl = lapply(genes_per_gocl, function(x) lapply(genes_conds, function(y) sum(y %in% x)))
genes_cond_gocl = Reduce(rbind, genes_cond_gocl)
rownames(genes_cond_gocl) = paste0("cl", 1:7)
anot_list[[n]] = anot
}
Plot some genes
pdf("results/zonation_cond/hep_top_go_terms_genes.pdf",
height = 6, width = 7, useDingbats = F)
for(n in names(terms_groups_list)){
terms_groups = terms_groups_list[[n]]
for(cond in names(terms_groups)){
plot_df = terms_groups[[cond]][rpfilter(terms_groups[[cond]]),]
plot_df = plot_df[order(plot_df$qvalue, decreasing = F),]
plot_df$Description = factor(plot_df$Description, levels = rev(plot_df$Description))
plt = ggplot(plot_df[1:10,], aes(x = Description, y = -log10(qvalue)))+
geom_bar(stat = "identity")+
coord_flip()+
labs(title = paste0(n, ": ", cond))+
theme_bw()
print(plt)
}
}
dev.off()
null device
1
pathway = "negative regulation of apoptotic signaling pathway"
unique(exp_plt_list$healthy_regenerating$regenerating[[pathway]]$variable)
gn = "ICAM1"
plot_df = exp_plt_list$healthy_regenerating$regenerating[[pathway]]
plot_df = plot_df[plot_df$variable==gn,]
c1 = "healthy"
c2 = "regenerating"
pw = "regenerating"
ggplot(plot_df, aes(x = xx, y = value, group = interaction(variable, cond)))+
geom_line(mapping = aes(colour = cond))+
labs(title = pathway, subtitle = paste0(c1, " vs ", c2, ", ", pw, " pathways"),
y = paste0("log(norm exp) ", gn))+
theme_classic()
Save important lists
save(terms_groups_list, anot_list, df_cor_list, genes_list, plot_list, exp_plt_list,
file = "results/zonation_cond/hep_go_results_rank.RData")
Get the top genes for each condition
save(terms_groups_list, anot_list, df_cor_list, genes_list, plot_list, exp_plt_list,
file = "results/zonation_cond/hep_go_results_rank.RData")
Find enriched GO Terms and Reactome pathways
# get signifficant genes showing variation (at least 0.1 range in the fitted values)
genes_list = list()
for(n in names(end_fits_list)){
h_fc_sp = apply(end_fits_list[[n]], 2, function(x) diff(range(x)))
genes_c = names(end_qvals_list[[n]])[end_qvals_list[[n]]<=0.05 &
names(end_qvals_list[[n]]) %in% names(h_fc_sp)[h_fc_sp>=0.1]]
genes_c = genes_c[genes_c %in% names(end_qvals_list[[n]])[order(end_qvals_list[[n]], decreasing = F)]]
genes_list[[n]] = genes_c
}
Identify pathways more affected by one condition
anot_list = list()
for(n in names(terms_groups_list)){
terms_groups = terms_groups_list[[n]]
df_cor = df_cor_list[[n]]
# get the pathways for all genes, minus those with ribosomal genes
all_path = terms_groups$all[rpfilter(terms_groups$all),]
all_path = all_path[order(all_path$qvalue, decreasing = F),]
# get gene groups as a data frame
g_g = unlist(apply(df_cor[,c(1,3,4,5)], 1,
function(x) colnames(df_cor[,c(3,4,5)])[which(as.logical(x[2:4]))]))
gene_group = data.frame("gene" = names(g_g),
"group" = g_g)
# get fraction of genes from a condition that are present in a give pathway
n_g_group = list()
n_g_group[[levels(gene_group$group)[1]]] = c()
n_g_group[[levels(gene_group$group)[2]]] = c()
n_g_group[[levels(gene_group$group)[3]]] = c()
for(i in 1:nrow(all_path)){
genes_p = unlist(strsplit(all_path$geneID[i], "/"))
for(gg in unique(gene_group$group)){
ngg = sum(genes_p %in% gene_group$gene[gene_group$group==gg])/sum(gene_group$group==gg)
n_g_group[[gg]] = c(n_g_group[[gg]], ngg)
}
}
all_path = cbind(all_path, data.frame(n_g_group))
all_path$diff = all_path[,13] - all_path[,12] # add difference between cond and healthy
all_path$highest = colnames(all_path)[11:13][apply(all_path[,11:13], 1, which.max)]
# group the detected pathways by similarity
go_path = all_path[all_path$DB=="GO Term",]
mat_path = matrix(0, nrow(go_path), nrow(go_path))
for(i in 1:nrow(go_path)){
g1 = go_path[i,"geneID"]
g1 = unlist(strsplit(g1, "/"))
for(j in 1:nrow(go_path)){
g2 = go_path[j,"geneID"]
g2 = unlist(strsplit(g2, "/"))
mat_path[i,j] = length(intersect(g1, g2))/mean(c(length(g1), length(g2)))
}
}
cl_go = hclust(dist(mat_path), method = "ward.D2")
cl_go = cutree(cl_go, 7)
rownames(mat_path) = go_path$Description
anot = data.frame(row.names = go_path$Description,
"cl" = paste0("cl",cl_go),
"n" = go_path$Count,
"diff" = go_path$diff,
"qval" = -log10(go_path$qvalue),
"term" = go_path$Description)
pheatmap::pheatmap(mat_path, clustering_method = "ward.D2",
annotation_row = anot[,-5], show_rownames = F)
genes_per_gocl = tapply(anot$term, anot$cl,
function(x) unique(unlist(strsplit(go_path[go_path$Description %in% x,"geneID"], "/"))))
genes_cond_gocl = lapply(genes_per_gocl, function(x) lapply(genes_conds, function(y) sum(y %in% x)))
genes_cond_gocl = Reduce(rbind, genes_cond_gocl)
rownames(genes_cond_gocl) = paste0("cl", 1:7)
anot_list[[n]] = anot
}
Plot top terms
anot_list = list()
for(n in names(terms_groups_list)){
terms_groups = terms_groups_list[[n]]
df_cor = df_cor_list[[n]]
# get the pathways for all genes, minus those with ribosomal genes
all_path = terms_groups$all[rpfilter(terms_groups$all),]
all_path = all_path[order(all_path$qvalue, decreasing = F),]
# get gene groups as a data frame
g_g = unlist(apply(df_cor[,c(1,3,4,5)], 1,
function(x) colnames(df_cor[,c(3,4,5)])[which(as.logical(x[2:4]))]))
gene_group = data.frame("gene" = names(g_g),
"group" = g_g)
# get fraction of genes from a condition that are present in a give pathway
n_g_group = list()
n_g_group[[levels(gene_group$group)[1]]] = c()
n_g_group[[levels(gene_group$group)[2]]] = c()
n_g_group[[levels(gene_group$group)[3]]] = c()
for(i in 1:nrow(all_path)){
genes_p = unlist(strsplit(all_path$geneID[i], "/"))
for(gg in unique(gene_group$group)){
ngg = sum(genes_p %in% gene_group$gene[gene_group$group==gg])/sum(gene_group$group==gg)
n_g_group[[gg]] = c(n_g_group[[gg]], ngg)
}
}
all_path = cbind(all_path, data.frame(n_g_group))
all_path$diff = all_path[,13] - all_path[,12] # add difference between cond and healthy
all_path$highest = colnames(all_path)[11:13][apply(all_path[,11:13], 1, which.max)]
# group the detected pathways by similarity
go_path = all_path[all_path$DB=="GO Term",]
mat_path = matrix(0, nrow(go_path), nrow(go_path))
for(i in 1:nrow(go_path)){
g1 = go_path[i,"geneID"]
g1 = unlist(strsplit(g1, "/"))
for(j in 1:nrow(go_path)){
g2 = go_path[j,"geneID"]
g2 = unlist(strsplit(g2, "/"))
mat_path[i,j] = length(intersect(g1, g2))/mean(c(length(g1), length(g2)))
}
}
cl_go = hclust(dist(mat_path), method = "ward.D2")
cl_go = cutree(cl_go, 7)
rownames(mat_path) = go_path$Description
anot = data.frame(row.names = go_path$Description,
"cl" = paste0("cl",cl_go),
"n" = go_path$Count,
"diff" = go_path$diff,
"qval" = -log10(go_path$qvalue),
"term" = go_path$Description)
pheatmap::pheatmap(mat_path, clustering_method = "ward.D2",
annotation_row = anot[,-5], show_rownames = F)
genes_per_gocl = tapply(anot$term, anot$cl,
function(x) unique(unlist(strsplit(go_path[go_path$Description %in% x,"geneID"], "/"))))
genes_cond_gocl = lapply(genes_per_gocl, function(x) lapply(genes_conds, function(y) sum(y %in% x)))
genes_cond_gocl = Reduce(rbind, genes_cond_gocl)
rownames(genes_cond_gocl) = paste0("cl", 1:7)
anot_list[[n]] = anot
}
Plot some genes
# plot all genes from pathway in grey, average in colour for cond and healthy
plot_list = list()
exp_plt_list = list()
for(comp in names(terms_groups_list)){
print(comp)
c1 = strsplit(comp, "_")[[1]][1]
c2 = strsplit(comp, "_")[[1]][2]
plot_list[[comp]] = list()
exp_plt_list[[comp]] = list()
for(pw in c(c1, c2)){
goterms = terms_groups_list[[comp]][[pw]]
goterms = goterms[goterms$DB=="GO Term" & rpfilter(goterms),]
exp_plt_list[[comp]][[pw]] = list()
plot_list[[comp]][[pw]] = list()
if(nrow(goterms)>0){
for(go in 1:nrow(goterms)){
genes_path = unlist(strsplit(goterms[go,"geneID"],"/"))
exp_healthy = reshape2::melt(end_fits_list[[c1]][,genes_path])
exp_healthy$xx = rep(1:100, length(genes_path))
exp_healthy$cond = c1
exp_cond = reshape2::melt(end_fits_list[[c2]][,genes_path])
exp_cond$xx = rep(1:100, length(genes_path))
exp_cond$cond = c2
exp_df = rbind(exp_healthy, exp_cond)
plt = ggplot(exp_df, aes(x = xx, y = value, group = interaction(variable, cond)))+
#geom_line(mapping = aes(colour = cond), alpha = 0.2)+
stat_smooth(mapping = aes(group = cond, colour = cond), show.legend = T)+
labs(title = goterms[go,"Description"],
subtitle = paste0(c1, " vs ", c2, ", ", pw, " pathways"))+
theme_classic()
plot_list[[comp]][[pw]][[goterms[go,"Description"]]] = plt
exp_plt_list[[comp]][[pw]][[goterms[go,"Description"]]] = exp_df
}
}
}
}
pathway = "cellular response to type I interferon"
unique(exp_plt_list$healthy_regenerating$regenerating[[pathway]]$variable)
gn = "EGR1"
plot_df = exp_plt_list$healthy_regenerating$regenerating[[pathway]]
plot_df = plot_df[plot_df$variable==gn,]
c1 = "healthy"
c2 = "regenerating"
pw = "regenerating"
ggplot(plot_df, aes(x = xx, y = value, group = interaction(variable, cond)))+
geom_line(mapping = aes(colour = cond))+
labs(title = pathway, subtitle = paste0(c1, " vs ", c2, ", ", pw, " pathways"),
y = paste0("log(norm exp) ", gn))+
theme_classic()
Save important lists
save(terms_groups_list, anot_list, df_cor_list, plot_list, exp_plt_list,
file = "results/zonation_cond/end_go_results_rank.RData")
Find genes with differencial variation - not very interesting since most genes seem to be showing a variation
save(terms_groups_list, anot_list, df_cor_list, plot_list, exp_plt_list,
file = "results/zonation_cond/end_go_results_rank.RData")
DTW condition vs condition
cond_comb = combn(names(qvals_list), 2)
for(i in 1:ncol(cond_comb)){
genes = intersect(names(qvals_list[[cond_comb[1,i]]])[qvals_list[[cond_comb[1,i]]]<=0.05],
names(qvals_list[[cond_comb[2,i]]])[qvals_list[[cond_comb[2,i]]]<=0.05])
sc_list = list()
for(cond in cond_comb[,i]){
gene_fit_vals = fits_list[[cond]]
gene_fit_vals_filt = t(gene_fit_vals[,genes])
sc_list[[cond]] = t(apply(gene_fit_vals_filt, 1, scale))
}
xxx = dtw(sc_list[[1]], sc_list[[2]], window.type = "itakura", dist.method = "DTW")
yyy = dtw(dist(t(sc_list[[1]]), t(sc_list[[2]]), method = "DTW"), window.type = "itakura")
}
source("aux_scripts/primate_cerebral_organoids_pt_alignment.r")
xcv = align_pt_traj(expr_ref = hep_cells$healthy@assays$SCT@data[genes,],
pt_ref = traj_list$healthy,
expr_query = hep_cells$embolised@assays$SCT@data[genes,],
pt_query = traj_list$embolised, num_breaks_ref = 100,
ref_name = "healthy", query_name = "embolised")
plot(traj_list$embolised, xcv$pt_query_aligned)
abline(0,1)
xcb = align_pt_traj(expr_ref = hep_cells$healthy@assays$SCT@data[genes,],
pt_ref = traj_list$healthy,
expr_query = hep_cells$regenerating@assays$SCT@data[genes,],
pt_query = traj_list$regenerating, num_breaks_ref = 100,
ref_name = "healthy", query_name = "regenerating")
plot(traj_list$regenerating, xcb$pt_query_aligned)
abline(0,1)
g = "CLEC1B"
plt_df = data.frame("traj" = c(traj_list$regenerating, traj_list$healthy),
"exp" = c(end_cells$regenerating@assays$SCT@data[g,names(traj_list$regenerating)],
end_cells$healthy@assays$SCT@data[g,]),
"cond" = c(rep("regenerating", length(traj_list$regenerating)),
rep("healthy", length(traj_list$healthy))),
"cl" = c(end_cells$regenerating@meta.data$allcells_simp,
end_cells$healthy@meta.data$allcells_simp))
plt_df = plt_df[order(plt_df$traj, decreasing = T),]
ggplot(plt_df, aes(x = traj, y = exp, group = cond, colour = cond))+
geom_point()+
stat_smooth()+
labs(title = g)+
theme_bw()
ggplot(plt_df, aes(x = traj, y = exp, group = cond, colour = cl, shape = cond))+
geom_point()+
stat_smooth()+
labs(title = g)+
theme_bw()
xxx = dtw::dtw(x = plt_df[plt_df$cond=="healthy","exp"], y = plt_df[plt_df$cond=="regenerating","exp"])
plot(xxx)
abline(0,1)
hti = cut_number(plt_df$traj[plt_df$cond=="healthy"], 100)
rti = cut_number(plt_df$traj[plt_df$cond=="regenerating"], 100)
df2 = data.frame("r" = tapply(plt_df$exp[plt_df$cond=="regenerating"], rti, mean),
"h" = tapply(plt_df$exp[plt_df$cond=="healthy"], hti, mean))
yyy = dtw::dtw(x = df2$h, y = df2$r)
plot(yyy)
abline(0,1)
Get GO Term enrichment
library(topGO)
cor_lists = list("hep" = hep_cor_list, "end" = end_cor_list)
cc = -0.25
go_enr_list = list()
for(gr in names(cor_lists)){
cor_list = cor_lists[[gr]]
go_enr_list[[gr]] = list()
for(comp in names(cor_list)){
testdf = cor_list[[comp]]
ncond = strsplit(comp, "_")[[1]]
go_enr_list[[gr]][[comp]] = list()
for(cond in ncond){
gsig = names(hep_qval[[cond]])[hep_qval[[cond]]<=0.05]
subtestdf = merge(testdf, hep_qval[[cond]], by = 0)
genes = subtestdf$X..i..
names(genes) = subtestdf[,1]
# GO enrichment
fsel = function(x) return(x<cc & subtestdf$y<=0.05)
sampleGOdata <- new("topGOdata",
description = "test", ontology = "BP",
allGenes = genes, geneSel = fsel,
nodeSize = 5, annot = annFUN.org,
mapping = "org.Hs.eg.db", ID = "symbol")
resultKS.elim <- runTest(sampleGOdata, algorithm = "elim",
statistic = "fisher")
resGO = GenTable(object = sampleGOdata, elimKS = resultKS.elim,
topNodes = resultKS.elim@geneData[4], numChar = 1000)
resGO$elimKS = score(resultKS.elim)[resGO$GO.ID]
resGO$cond = cond
# get genes
relevant_genes = sampleGOdata@allGenes[sampleGOdata@geneSelectionFun(sampleGOdata@allScores)]
goGenes = t(data.frame(lapply(genesInTerm(sampleGOdata), function(x) paste(x[x %in% relevant_genes], collapse = ", "))))
rownames(goGenes) = gsub(".", ":", rownames(goGenes), fixed = T)
colnames(goGenes) = "genes"
resGO = merge(resGO, goGenes, by.x = 1, by.y = 0, all.x = T)
if(cond==ncond[[1]]){
go_enr_list[[gr]][[comp]] = resGO[,c("GO.ID","Term","Annotated","Significant",
"Expected","elimKS","cond","genes")]
} else{
go_enr_list[[gr]][[comp]] = rbind(go_enr_list[[gr]][[comp]],
resGO[,c("GO.ID","Term","Annotated","Significant",
"Expected","elimKS","cond","genes")])
}
}
}
}
saveRDS(go_enr_list, file = "results/zonation_cond/go_enr_corsig_list.RDS")
gg = unique(c(names(end_qval$healthy)[order(end_qval$healthy, decreasing = F)][1:1000],
names(end_qval$regenerating)[order(end_qval$regenerating, decreasing = F)][1:1000]))
diff_fits = apply(end_fits$healthy[,gg], 2, rank)-apply(end_fits$regenerating[,gg], 2, rank)
cl = hclust(dist(t(diff_fits)), method = "ward.D")
ct = cutree(cl, 6)
df = data.frame("cor" = end_cor_fit_list$healthy_regenerating[gg,2],
"score" = apply(apply(t(diff_fits), 1, range),
2, function(x) sum(abs(x))))
diff_genes = rownames(df)[df$cor<0.25]
cl = hclust(dist(t(diff_fits)[diff_genes,]), method = "ward.D")
ct = cutree(cl, 4)
df = data.frame("cl" = factor(ct),
"cor" = 1-end_cor_fit_list$healthy_regenerating[diff_genes,2],
"score" = apply(apply(t(diff_fits)[diff_genes,], 1, range),
2, function(x) sum(abs(x))))
pheatmap::pheatmap(t(diff_fits)[diff_genes,], cluster_cols = F,
clustering_method = "ward.D", fontsize_row = 8, annotation_row = df)
plot(hep_fits$healthy[,"SAA2"])
plot(hep_fits$regenerating[,"SAA2"])
fsel = function(x) return(x<0.25 & names(genes) %in% gg)
fsel = function(x) return(names(genes) %in% gg)
genes = end_cor_fit_list$healthy_regenerating$X..i..
names(genes) = end_cor_fit_list$healthy_regenerating[,1]
genes = genes[!is.na(genes)]
eg = clusterProfiler::bitr(names(genes[fsel(genes)]), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
eg_all = clusterProfiler::bitr(names(genes), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
x <- ReactomePA::enrichPathway(gene=eg$ENTREZID,pvalueCutoff=0.05,
readable=T, universe = eg_all$ENTREZID)
clusterProfiler::dotplot(x)
head(as.data.frame(x))
ego3 <- clusterProfiler::enrichGO(gene = eg$ENTREZID, universe = eg_all$ENTREZID,
OrgDb = org.Hs.eg.db, ont = "BP", pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
pAdjustMethod = "BH", readable = T)
clusterProfiler::dotplot(ego3)
gg = unique(c(names(qvals_b_list$healthy)[order(qvals_b_list$healthy, decreasing = F)][1:1000],
names(qvals_b_list$regenerating)[order(qvals_b_list$regenerating, decreasing = F)][1:1000]))
diff_fits = apply(fits_b_list$healthy[,gg], 2, rank)-apply(fits_b_list$regenerating[,gg], 2, rank)
cl = hclust(dist(t(diff_fits)), method = "ward.D")
ct = cutree(cl, 6)
df = data.frame("cor" = hep_cor_fit_list$healthy_regenerating[gg,2],
"score" = apply(apply(t(diff_fits), 1, range),
2, function(x) sum(abs(x))))
df = df[complete.cases(df),]
diff_genes = rownames(df)[df$cor<0.25]
cl = hclust(dist(t(diff_fits)[diff_genes,]), method = "ward.D")
ct = cutree(cl, 4)
df = data.frame("cl" = factor(ct),
"cor" = 1-hep_cor_fit_list$healthy_regenerating[diff_genes,2],
"score" = apply(apply(t(diff_fits)[diff_genes,], 1, range),
2, function(x) sum(abs(x))))
pheatmap::pheatmap(t(diff_fits)[diff_genes,], cluster_cols = F,
clustering_method = "ward.D", fontsize_row = 8, annotation_row = df)
plot(hep_fits$healthy[,"CPS1"])
plot(hep_fits$regenerating[,"CPS1"])
genes_healthy = names(qvals_b_list$healthy)[order(qvals_b_list$healthy, decreasing = F)][qvals_b_list$healthy[order(qvals_b_list$healthy, decreasing = F)]<=0.05]
genes_regen = names(qvals_b_list$regenerating)[order(qvals_b_list$regenerating, decreasing = F)][qvals_b_list$regenerating[order(qvals_b_list$regenerating, decreasing = F)]<=0.05]
gg = unique(c(genes_healthy, genes_regen))
eg = clusterProfiler::bitr(gg, fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
eg_all = clusterProfiler::bitr(names(qvals_b_list$healthy), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
reac = ReactomePA::enrichPathway(gene=eg$ENTREZID,pvalueCutoff=0.05,
readable=T, universe = eg_all$ENTREZID)
got = clusterProfiler::enrichGO(gene = eg$ENTREZID, universe = eg_all$ENTREZID,
OrgDb = org.Hs.eg.db, ont = "BP", pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
pAdjustMethod = "BH", readable = T)
all_terms = rbind(as.data.frame(reac), as.data.frame(got))
all_terms$DB = c(rep("Reactome", nrow(as.data.frame(reac))), rep("GO Term", nrow(as.data.frame(got))))
all_terms = all_terms[all_terms$qvalue<=0.05,]
all_terms_noribo = all_terms[!grepl("RPL", all_terms$geneID) & !grepl("RPS", all_terms$geneID),]
nhealthy = c()
nregen = c()
corhealthy = c()
corregen = c()
mean_traj_h = matrix(0, nrow(all_terms_noribo), 100)
mean_traj_r = matrix(0, nrow(all_terms_noribo), 100)
cor_mean = c()
for(i in 1:nrow(all_terms_noribo)){
genes_path = unlist(strsplit(all_terms_noribo[i,"geneID"],"/"))
in_healthy = genes_path[genes_path %in% genes_healthy]
in_regen = genes_path[genes_path %in% genes_regen]
nhealthy = c(nhealthy, length(in_healthy))
nregen = c(nregen, length(in_regen))
if(length(in_healthy)>1){
ch = cor(fits_b_list$healthy[,in_healthy], method = "sp")
corhealthy = c(corhealthy, median(ch[upper.tri(ch)]))
} else{
corhealthy = c(corhealthy,1)
}
if(length(in_regen)>1){
ch = cor(fits_b_list$regen[,in_regen], method = "sp")
corregen = c(corregen, median(ch[upper.tri(ch)]))
} else{
corregen = c(corregen,1)
}
mean_traj_h[i,] = if(length(in_healthy)>1){ rowMeans(fits_b_list$healthy[,in_healthy])} else if(length(in_healthy)>0){fits_b_list$healthy[,in_healthy]} else {rep(0,100)}
mean_traj_r[i,] = if(length(in_regen)>1){ rowMeans(fits_b_list$regenerating[,in_regen])} else if(length(in_regen)>0){fits_b_list$regenerating[,in_regen]} else {rep(0,100)}
cor_mean = c(cor_mean, cor(mean_traj_h[i,], mean_traj_r[i,]))
}
all_terms_noribo = cbind(all_terms_noribo, nhealthy, nregen, corhealthy, corregen, cor_mean)
rownames(mean_traj_h) = rownames(mean_traj_r) = all_terms_noribo$Description
View(all_terms_noribo[all_terms_noribo$corhealthy>=0.25 & all_terms_noribo$corregen>=0.25 &
all_terms_noribo$nhealthy>=2 & all_terms_noribo$nregen>=2,])
i = which(all_terms_noribo$Description=="mRNA Splicing - Minor Pathway")
genes_healthy = names(qvals_b_list$healthy)[qvals_b_list$healthy<=0.05]
genes_regen = names(qvals_b_list$regenerating)[qvals_b_list$regenerating<=0.05]
genes_conds = list("healthy" = setdiff(genes_healthy, genes_regen),
"regen" = setdiff(genes_regen, genes_healthy),
"common" = intersect(genes_healthy, genes_regen))
terms_list = list()
for(n in names(genes_conds)){
eg = clusterProfiler::bitr(genes_conds[[n]], fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
eg_all = clusterProfiler::bitr(names(qvals_b_list$healthy), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
reac = ReactomePA::enrichPathway(gene=eg$ENTREZID,pvalueCutoff=0.05,
readable=T, universe = eg_all$ENTREZID)
got = clusterProfiler::enrichGO(gene = eg$ENTREZID, universe = eg_all$ENTREZID,
OrgDb = org.Hs.eg.db, ont = "BP", pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
pAdjustMethod = "BH", readable = T)
all_terms = rbind(as.data.frame(reac), as.data.frame(got))
all_terms$DB = c(rep("Reactome", nrow(as.data.frame(reac))), rep("GO Term", nrow(as.data.frame(got))))
all_terms = all_terms[all_terms$qvalue<=0.05,]
all_terms_noribo = all_terms[!grepl("RPL", all_terms$geneID) & !grepl("RPS", all_terms$geneID),]
nhealthy = c()
nregen = c()
corhealthy = c()
corregen = c()
mean_traj_h = matrix(0, nrow(all_terms_noribo), 100)
mean_traj_r = matrix(0, nrow(all_terms_noribo), 100)
cor_mean = c()
for(i in 1:nrow(all_terms_noribo)){
genes_path = unlist(strsplit(all_terms_noribo[i,"geneID"],"/"))
in_healthy = genes_path[genes_path %in% genes_healthy]
in_regen = genes_path[genes_path %in% genes_regen]
nhealthy = c(nhealthy, length(in_healthy))
nregen = c(nregen, length(in_regen))
if(length(in_healthy)>1){
ch = cor(fits_b_list$healthy[,in_healthy], method = "sp")
corhealthy = c(corhealthy, median(ch[upper.tri(ch)]))
} else{
corhealthy = c(corhealthy,0)
}
if(length(in_regen)>1){
ch = cor(fits_b_list$regen[,in_regen], method = "sp")
corregen = c(corregen, median(ch[upper.tri(ch)]))
} else{
corregen = c(corregen,0)
}
mean_traj_h[i,] = if(length(in_healthy)>1){ rowMeans(fits_b_list$healthy[,in_healthy])} else if(length(in_healthy)>0){fits_b_list$healthy[,in_healthy]} else {rep(0,100)}
mean_traj_r[i,] = if(length(in_regen)>1){ rowMeans(fits_b_list$regenerating[,in_regen])} else if(length(in_regen)>0){fits_b_list$regenerating[,in_regen]} else {rep(0,100)}
cor_mean = c(cor_mean, cor(mean_traj_h[i,], mean_traj_r[i,]))
}
all_terms_noribo = cbind(all_terms_noribo, nhealthy, nregen, corhealthy, corregen, cor_mean)
rownames(mean_traj_h) = rownames(mean_traj_r) = all_terms_noribo$Description
terms_list[[n]] = all_terms_noribo
}
cl = hclust(dist(t(diff_fits)), method = "ward.D")
ct = cutree(cl, 1)
term_list = list()
for(i in unique(ct)){
fsel = function(x) return(x<0.25 & names(genes) %in% names(ct)[ct==i])
genes = hep_cor_fit_list$healthy_regenerating$X..i..
names(genes) = hep_cor_fit_list$healthy_regenerating[,1]
genes = genes[!is.na(genes)]
eg = clusterProfiler::bitr(names(genes[fsel(genes)]), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
eg_all = clusterProfiler::bitr(names(genes), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
x <- ReactomePA::enrichPathway(gene=eg$ENTREZID,pvalueCutoff=0.05,
readable=T, universe = eg_all$ENTREZID)
term_list[[paste0("reac_",i)]] = x
ego3 <- clusterProfiler::enrichGO(gene = eg$ENTREZID, universe = eg_all$ENTREZID,
OrgDb = org.Hs.eg.db, ont = "BP", pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
pAdjustMethod = "BH", readable = T)
term_list[[paste0("go_",i)]] = ego3
}
genes = hep_cor_fit_list$healthy_regenerating$X..i..
names(genes) = hep_cor_fit_list$healthy_regenerating[,1]
genes = genes[!is.na(genes)]
# GO enrichment
fsel = function(x) return(x<0.25 & names(genes) %in% gg)
sampleGOdata <- new("topGOdata",
description = "test", ontology = "BP",
allGenes = genes, geneSel = fsel,
nodeSize = 5, annot = annFUN.org,
mapping = "org.Hs.eg.db", ID = "symbol")
resultKS.elim <- runTest(sampleGOdata, algorithm = "elim",
statistic = "fisher")
resGO = GenTable(object = sampleGOdata, elimKS = resultKS.elim,
topNodes = resultKS.elim@geneData[4], numChar = 1000)
resGO$elimKS = score(resultKS.elim)[resGO$GO.ID]
# get genes
relevant_genes = sampleGOdata@allGenes[sampleGOdata@geneSelectionFun(sampleGOdata@allScores)]
goGenes = t(data.frame(lapply(genesInTerm(sampleGOdata), function(x) paste(x[x %in% relevant_genes], collapse = ", "))))
rownames(goGenes) = gsub(".", ":", rownames(goGenes), fixed = T)
colnames(goGenes) = "genes"
resGO = merge(resGO, goGenes, by.x = 1, by.y = 0, all.x = T)
subresGO = resGO[!(grepl("RPL", resGO$genes) & grepl("RPS", resGO$genes)),]
sig_resGO = resGO[resGO$elimKS<=0.05,]
hprof_go = list()
rprof_go = list()
for(l in 1:nrow(sig_resGO)){
genes_go = unlist(strsplit(as.character(sig_resGO[l,"genes"]), ", "))
hprof_go[[sig_resGO[l,2]]] = t(hep_fits$healthy[,genes_go])
rprof_go[[sig_resGO[l,2]]] = t(hep_fits$regenerating[,genes_go])
}
hmax = unlist(lapply(hprof_go, function(x) which.max(colSums(x))))
rmax = unlist(lapply(rprof_go, function(x) which.max(colSums(x))))
names(hmax) = names(rmax) = sig_resGO$Term
sig_resGO = sig_resGO[order(sig_resGO$elimKS, decreasing = F),]
go_short_list = list()
for(i in 1:nrow(sig_resGO)){
g = unlist(strsplit(as.character(sig_resGO[i,"genes"]), ", "))
if(!any(g %in% unlist(go_short_list)) & length(g)>1){
go_short_list[[sig_resGO[i,2]]] = g
}
}
res = matrix(0, 1, 100)
for(n in names(go_short_list)){
if(length(go_short_list[[n]])>1){
cl = hclust(dist(t(diff_fits[,go_short_list[[n]]])), method = "ward.D")
res = rbind(res, t(diff_fits[,go_short_list[[n]]])[cl$order,])
}
}
res = res[-1,]
pheatmap::pheatmap(res, cluster_cols = F, cluster_rows = F, fontsize_row = 8, gaps_row = cumsum(unlist(lapply(go_short_list, length))))
sig_resGO = sig_resGO[order(sig_resGO$elimKS, decreasing = F),]
go_med = matrix(0, 1, 100)
for(i in 1:nrow(sig_resGO)){
g = unlist(strsplit(as.character(sig_resGO[i,"genes"]), ", "))
if(length(g)>1){
go_med = rbind(go_med, apply(t(diff_fits[,g]), 2, median))
rownames(go_med)[nrow(go_med)] = sig_resGO[i,2]
}
}
go_med = go_med[-1,]
eg = clusterProfiler::bitr(names(genes[fsel(genes)]), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
eg_all = clusterProfiler::bitr(names(genes), fromType="SYMBOL",
toType="ENTREZID", OrgDb="org.Hs.eg.db")
x <- as.data.frame(ReactomePA::enrichPathway(gene=eg$ENTREZID,pvalueCutoff=0.05,
readable=T, universe = eg_all$ENTREZID))
head(as.data.frame(x))
go_med = matrix(0, 1, 100)
for(i in 1:nrow(x)){
g = unlist(strsplit(as.character(x[i,"geneID"]), "/"))
if(length(g)>1){
go_med = rbind(go_med, apply(t(diff_fits[,g]), 2, median))
rownames(go_med)[nrow(go_med)] = x[i,2]
}
}
go_med = go_med[-1,]
genes_reac = strsplit(x$geneID, "/")
names(genes_reac) = x$Description
genes_reac = reshape2::melt(genes_reac)
dat = genes_reac[!duplicated(genes_reac$value),]
dat = data.frame(row.names = dat[,1], "Reac" = dat[,2])
pheatmap::pheatmap(t(diff_fits)[diff_genes,], cluster_cols = F,
clustering_method = "ward.D", fontsize_row = 8, annotation_row = dat)
hprof_re = list()
rprof_re = list()
for(l in 1:nrow(x)){
genes_go = unlist(strsplit(as.character(x[l,"geneID"]), "/"))
hprof_re[[x[l,2]]] = t(hep_fits$healthy[,genes_go])
rprof_re[[x[l,2]]] = t(hep_fits$regenerating[,genes_go])
}
hmet = ggplot(reshape2::melt(t(apply(hprof_re$`Metallothioneins bind metals`, 1, scale))),
aes(x = Var2, y = value, group = Var1, colour = Var1))+
geom_line()+labs(title = "Healthy - Metallothioneins bind metals")
rmet = ggplot(reshape2::melt(t(apply(rprof_re$`Metallothioneins bind metals`, 1, scale))),
aes(x = Var2, y = value, group = Var1, colour = Var1))+
geom_line()+labs(title = "Regen - Metallothioneins bind metals")
rlip = ggplot(reshape2::melt(t(apply(rprof_re$`Plasma lipoprotein remodeling`, 1, scale))),
aes(x = Var2, y = value, group = Var1, colour = Var1))+
geom_line()+labs(title = "Regen - Plasma lipoprotein remodeling")
hlip = ggplot(reshape2::melt(t(apply(hprof_re$`Plasma lipoprotein remodeling`, 1, scale))),
aes(x = Var2, y = value, group = Var1, colour = Var1))+
geom_line()+labs(title = "Healthy - Plasma lipoprotein remodeling")
cowplot::plot_grid(hmet, rmet, hlip, rlip, ncol = 2, row = 2, align = "hv")
ego3 <- clusterProfiler::enrichGO(gene = eg$ENTREZID, universe = eg_all$ENTREZID,
OrgDb = org.Hs.eg.db, ont = "BP", pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
pAdjustMethod = "BH", readable = T)
genes_ent = merge(eg_all, genes, by.x = 1, by.y = 0)
tmp = genes_ent[,2]
genes_ent = genes_ent[,3]
names(genes_ent) = tmp
ego4 <- clusterProfiler::gseGO(geneList = genes_ent[order(genes_ent, decreasing = T)],
OrgDb = org.Hs.eg.db, ont = "BP", nPerm = 1000, minGSSize = 100,
maxGSSize = 500, pvalueCutoff = 0.05, verbose = FALSE)
plot_genes_fits = cbind(hep_fits$healthy[,c("SAA1", "HAMP", "CYP3A4", "GLUL", "CPS1", "ARG1")],
hep_fits$embolised[,c("SAA1", "HAMP", "CYP3A4", "GLUL", "CPS1", "ARG1")],
hep_fits$regenerating[,c("SAA1", "HAMP", "CYP3A4", "GLUL", "CPS1", "ARG1")])
plot_genes_fits = apply(plot_genes_fits, 2, c)
colnames(plot_genes_fits) = paste0(colnames(plot_genes_fits),
rep(c("_healthy", "_embolised", "_regenerating"), each = 6))
plot_genes_fits = reshape2::melt(plot_genes_fits)
plot_genes_fits$cond = unlist(lapply(strsplit(as.character(plot_genes_fits$Var2), "_"), function(x) x[2]))
plot_genes_fits$cond = factor(plot_genes_fits$cond, levels = c("healthy", "embolised", "regenerating"))
plot_genes_fits$genes = unlist(lapply(strsplit(as.character(plot_genes_fits$Var2), "_"), function(x) x[1]))
ggplot(plot_genes_fits, aes(x = Var1, y = value, colour = genes))+facet_wrap(~cond)+geom_line()
all_path = terms_groups$all[rpfilter(terms_groups$all),]
all_path = all_path[order(all_path$qvalue, decreasing = F),]
tab_go = reshape2::melt(lapply(df_cor$gene, function(g) all_path[which(grepl(g, all_path$geneID) & all_path$DB=="GO Term")[1],c("Description", "DB", "qvalue")][1,]))
tab_go$gene = df_cor$gene
tab_go = tab_go[,c(6,1,2,4)]
tab_go$de = apply(df_cor[,c(1,3,4,5)], 1, function(x) colnames(df_cor[,c(3,4,5)])[which(as.logical(x[2:4]))])
tab_go$de[nchar(tab_go$de)>10] = NA
tab_go$de = unlist(tab_go$de)
tab_go$corr = df_cor$corr
tab_re = reshape2::melt(lapply(df_cor$gene, function(g) all_path[which(grepl(g, all_path$geneID) & all_path$DB=="Reactome")[1],c("Description", "DB", "qvalue")][1,]))
tab_re$gene = df_cor$gene
tab_re = tab_re[,c(6,1,2,4)]
tab_re$de = apply(df_cor[,c(1,3,4,5)], 1, function(x) colnames(df_cor[,c(3,4,5)])[which(as.logical(x[2:4]))])
tab_re$de[nchar(tab_re$de)>10] = NA
tab_re$de = unlist(tab_re$de)
tab_re$corr = df_cor$corr
tab_all = rbind(tab_go, tab_re)
tab_all = tab_all[complete.cases(tab_all),]
xxx = table(tab_all$Description, tab_all$de)
xxx = xxx[rowSums(xxx)>2,]
xxx = xxx/rowSums(xxx)
View(apply(xxx, 1, function(x) ifelse(any(x>0.5), colnames(xxx)[x>0.5], NA)))
aaa = cbind(tapply(tab_all$corr, tab_all$Description, function(x) sum(x>=0.25)/length(x)),
tapply(tab_all$corr, tab_all$Description, function(x) length(x)),
tapply(tab_all$de, tab_all$Description, function(x) names(table(x))[which.max(table(x))]))
Calculate distances for all significant genes
for(cond in names(qvals_list)){
gene_fit_p = qvals_list[[cond]]
gene_fit_vals = fits_list[[cond]]
gene_fit_vals_filt = t(gene_fit_vals[,names(gene_fit_p)[gene_fit_p<=0.05]])
scale_fit = t(apply(gene_fit_vals_filt, 1, scale))
# dist
if(file.exists(paste0("results/zonation_cond/dtw_distance_end_", cond, "_sigGenes.RDS"))){
next
} else{
dd = dtw::dtwDist(scale_fit) # TAKES 4.5 HOURS
saveRDS(dd, paste0("results/zonation_cond/dtw_distance_end_", cond, "_sigGenes.RDS"))
}
}
Cluster genes (per condition)
top_n = 1000
for(cond in names(qvals_list)){
print(cond)
gene_fit_p = qvals_list[[cond]]
gene_fit_vals = fits_list[[cond]]
dd = readRDS(paste0("results/zonation_cond/dtw_distance_end_", cond, "_sigGenes.RDS"))
gene_fit_vals_filt = t(gene_fit_vals[,names(gene_fit_p)[gene_fit_p<=0.05]])
scale_fit = t(apply(gene_fit_vals_filt, 1, scale))
# select top genes (0 if using all)
topgenes = if(top_n==0) names(gene_fit_p) else names(head(gene_fit_p[order(gene_fit_p,
decreasing = F)],
top_n))
clust_genes = hclust(as.dist(dd[topgenes,topgenes]), method = "ward.D2")
cls = cutree(clust_genes, 4)
#ape::plot.phylo(ape::as.phylo(clust_genes), tip.color = rainbow(10)[cls])
plt_list = list()
for(i in unique(cls)){
sub_df = scale_fit[names(cls)[cls==i],]
plot_df = reshape2::melt(sub_df)
plt_list[[paste0("gene_",i)]] = ggplot(plot_df, aes(x = Var2, y = value, group = Var1))+
geom_hline(yintercept = 0, linetype = "dashed")+
geom_line(alpha = 0.1, colour = "grey69")+
stat_summary(aes(y = value, group=1), fun=mean, colour="red", geom="line",group=1)+
labs(title = paste0("gene_",i))+
theme_bw()
}
pdf(paste0("results/zonation_cond/end_gene_expression_profile_", cond, ".pdf"),
height = 4.5, width = 4.4)
print(cowplot::plot_grid(plotlist = plt_list, nrow = 2, ncol = 2, align = "hv"))
dev.off()
df_pval_cl = merge(data.frame(gene_fit_p), data.frame(cls), by = 0, all = T)
colnames(df_pval_cl) = c("gene", "pval", "cluster")
write.csv(df_pval_cl, file = paste0("results/zonation_cond/end_pval_clust_", cond, ".csv"),
col.names = T, row.names = F, quote = F)
write.csv(gene_fit_vals,
file = paste0("results/zonation_cond/end_gene_fits_", cond, ".csv"),
col.names = T, row.names = F, quote = F)
}
Calculate distances for all significant genes
for(cond in names(qvals_list)){
gene_fit_p = qvals_list[[cond]]
gene_fit_vals = fits_list[[cond]]
gene_fit_vals_filt = t(gene_fit_vals[,names(gene_fit_p)[gene_fit_p<=0.05]])
scale_fit = t(apply(gene_fit_vals_filt, 1, scale))
# dist
if(file.exists(paste0("results/zonation_cond/dtw_distance_", cond, "_sigGenes.RDS"))){
next
} else{
dd = dtw::dtwDist(scale_fit) # TAKES 4.5 HOURS
saveRDS(dd, paste0("results/zonation_cond/dtw_distance_", cond, "_sigGenes.RDS"))
}
}
Cluster genes (per condition)
top_n = 1000
for(cond in names(qvals_list)){
print(cond)
gene_fit_p = qvals_list[[cond]]
gene_fit_vals = fits_list[[cond]]
dd = readRDS(paste0("results/zonation_cond/dtw_distance_", cond, "_sigGenes.RDS"))
gene_fit_vals_filt = t(gene_fit_vals[,names(gene_fit_p)[gene_fit_p<=0.05]])
scale_fit = t(apply(gene_fit_vals_filt, 1, scale))
# select top genes (0 if using all)
topgenes = if(top_n==0) names(gene_fit_p) else names(head(gene_fit_p[order(gene_fit_p,
decreasing = F)],
top_n))
clust_genes = hclust(as.dist(dd[topgenes,topgenes]), method = "ward.D2")
cls = cutree(clust_genes, 4)
#ape::plot.phylo(ape::as.phylo(clust_genes), tip.color = rainbow(10)[cls])
plt_list = list()
for(i in unique(cls)){
sub_df = scale_fit[names(cls)[cls==i],]
plot_df = reshape2::melt(sub_df)
plt_list[[paste0("gene_",i)]] = ggplot(plot_df, aes(x = Var2, y = value, group = Var1))+
geom_hline(yintercept = 0, linetype = "dashed")+
geom_line(alpha = 0.1, colour = "grey69")+
stat_summary(aes(y = value, group=1), fun=mean, colour="red", geom="line",group=1)+
labs(title = paste0("gene_",i))+
theme_bw()
}
pdf(paste0("results/zonation_cond/hep_gene_expression_profile_", cond, ".pdf"),
height = 4.5, width = 4.4)
print(cowplot::plot_grid(plotlist = plt_list, nrow = 2, ncol = 2, align = "hv"))
dev.off()
df_pval_cl = merge(data.frame(gene_fit_p), data.frame(cls), by = 0, all = T)
colnames(df_pval_cl) = c("gene", "pval", "cluster")
write.csv(df_pval_cl, file = paste0("results/zonation_cond/hep_pval_clust_", cond, ".csv"),
col.names = T, row.names = F, quote = F)
write.csv(gene_fit_vals, file = paste0("results/zonation_cond/hep_gene_fits_", cond, ".csv"),
col.names = T, row.names = F, quote = F)
}